From 0bf5df3b58e152ba2a50f9918f31a9c15b479b64 Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Sat, 5 Mar 2016 06:59:51 +0100 Subject: drm/amdgpu: delete set-but-not-read member has_uvd from amdgpu_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up leftover from radeon code. Signed-off-by: Nils Wallménius Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0c42a85ca5a5..cfd35b0fce06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2053,7 +2053,6 @@ struct amdgpu_device { struct amdgpu_sdma sdma; /* uvd */ - bool has_uvd; struct amdgpu_uvd uvd; /* vce */ -- cgit From 11afbde85eeba1ccb0a459c49444aaf9032e9755 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 3 Mar 2016 11:38:48 +0800 Subject: drm/amdgpu: add hdp_invalidate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's called after emitting ibs. Signed-off-by: Chunming Zhou Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cfd35b0fce06..497ca587cc35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -329,6 +329,7 @@ struct amdgpu_ring_funcs { void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); + void (*emit_hdp_invalidate)(struct amdgpu_ring *ring); void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, @@ -2230,6 +2231,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) +#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index db14a7bbb8f4..4e978e7aa1b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -178,6 +178,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ring->current_ctx = ctx; } + if (vm) { + if (ring->funcs->emit_hdp_invalidate) + amdgpu_ring_emit_hdp_invalidate(ring); + } + r = amdgpu_fence_emit(ring, owner, &ib->fence); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); -- cgit From 211dff5518d938d0c11205af3113d2c7a292dd98 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 22 Feb 2016 15:40:59 +0100 Subject: drm/amdgpu: group userptr in the BO list v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need them together with the next patch. v2: Don't take bo reference twice Signed-off-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 24 +++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 497ca587cc35..11a9f5899995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1048,7 +1048,7 @@ struct amdgpu_bo_list { struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; - bool has_userptr; + unsigned first_userptr; unsigned num_entries; struct amdgpu_bo_list_entry *array; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 4792f9d0b7d4..9763e52886fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -91,7 +91,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo; struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo; - bool has_userptr = false; + unsigned last_entry = 0, first_userptr = num_entries; unsigned i; int r; @@ -101,8 +101,9 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); for (i = 0; i < num_entries; ++i) { - struct amdgpu_bo_list_entry *entry = &array[i]; + struct amdgpu_bo_list_entry *entry; struct drm_gem_object *gobj; + struct amdgpu_bo *bo; struct mm_struct *usermm; gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle); @@ -111,19 +112,24 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, goto error_free; } - entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); drm_gem_object_unreference_unlocked(gobj); - entry->priority = min(info[i].bo_priority, - AMDGPU_BO_LIST_MAX_PRIORITY); - usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm); + + usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm) { if (usermm != current->mm) { - amdgpu_bo_unref(&entry->robj); + amdgpu_bo_unref(&bo); r = -EPERM; goto error_free; } - has_userptr = true; + entry = &array[--first_userptr]; + } else { + entry = &array[last_entry++]; } + + entry->robj = bo; + entry->priority = min(info[i].bo_priority, + AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; @@ -145,7 +151,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, list->gds_obj = gds_obj; list->gws_obj = gws_obj; list->oa_obj = oa_obj; - list->has_userptr = has_userptr; + list->first_userptr = first_userptr; list->array = array; list->num_entries = num_entries; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52c3eb96b199..7833dfb1ff6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -350,7 +350,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); if (p->bo_list) { - need_mmap_lock = p->bo_list->has_userptr; + need_mmap_lock = p->bo_list->first_userptr != + p->bo_list->num_entries; amdgpu_bo_list_get_list(p->bo_list, &p->validated); } -- cgit From 336d1f5efe93db3d997a6d105760dd613d7ecdce Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Feb 2016 10:57:10 +0100 Subject: drm/amdgpu: remove HW fence owner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not used any more since we now always use the sheduler. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 ++------- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 6 +----- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 8 +------- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 7 ++----- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 +-- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 +-- 12 files changed, 16 insertions(+), 43 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 11a9f5899995..fab6ddb26b5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -409,9 +409,6 @@ struct amdgpu_fence { struct amdgpu_ring *ring; uint64_t seq; - /* filp or special value for fence creator */ - void *owner; - wait_queue_t fence_wake; }; @@ -432,8 +429,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, - struct amdgpu_fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence **fence); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -1177,8 +1173,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ib, void *owner, - struct fence *last_vm_update, + struct amdgpu_ib *ib, struct fence *last_vm_update, struct fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97db196dc6f8..d94b13ac290f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -91,25 +91,21 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with - * @owner: creator of the fence * @fence: amdgpu fence object * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, - struct amdgpu_fence **fence) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence **fence) { struct amdgpu_device *adev = ring->adev; - /* we are protected by the ring emission mutex */ *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if ((*fence) == NULL) { return -ENOMEM; } (*fence)->seq = ++ring->fence_drv.sync_seq; (*fence)->ring = ring; - (*fence)->owner = owner; fence_init(&(*fence)->base, &amdgpu_fence_ops, &ring->fence_drv.fence_queue.lock, adev->fence_context + ring->idx, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4e978e7aa1b8..9550247b030d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -101,7 +101,6 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) * @adev: amdgpu_device pointer * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule - * @owner: owner for creating the fences * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). @@ -118,8 +117,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, void *owner, - struct fence *last_vm_update, + struct amdgpu_ib *ibs, struct fence *last_vm_update, struct fence **f) { struct amdgpu_device *adev = ring->adev; @@ -183,7 +181,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_hdp_invalidate(ring); } - r = amdgpu_fence_emit(ring, owner, &ib->fence); + r = amdgpu_fence_emit(ring, &ib->fence); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); ring->current_ctx = old_ctx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 90e52f7e17a0..692b45560d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -148,7 +148,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) } trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner, + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->sync.last_vm_update, &fence); if (r) { DRM_ERROR("Error scheduling IBs (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 87690cc57206..e3673422aac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -60,12 +60,8 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) */ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) { - struct amdgpu_fence *a_fence = to_amdgpu_fence(f); struct amd_sched_fence *s_fence = to_amd_sched_fence(f); - if (a_fence) - return a_fence->ring->adev == adev; - if (s_fence) { struct amdgpu_ring *ring; @@ -85,13 +81,11 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) */ static void *amdgpu_sync_get_owner(struct fence *f) { - struct amdgpu_fence *a_fence = to_amdgpu_fence(f); struct amd_sched_fence *s_fence = to_amd_sched_fence(f); if (s_fence) return s_fence->owner; - else if (a_fence) - return a_fence->owner; + return AMDGPU_FENCE_OWNER_UNDEFINED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 1de82bf4fc79..e5f0a5e29551 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -886,8 +886,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, - AMDGPU_FENCE_OWNER_UNDEFINED, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); if (r) goto err_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 39c3aa60381a..6d191fb40b38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -425,8 +425,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); if (r) goto err; @@ -487,9 +486,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, - AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c4101ebd2b3f..b5b4220a6141 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -643,8 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; - r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 1b713009c5d2..46c2436d74bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2136,8 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) goto err2; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index aeb35f504999..e0b64de9b5af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -706,8 +706,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) goto err2; @@ -1262,8 +1261,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); /* shedule the ib on the ring */ - r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) { DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); goto fail; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 134d5d0bb77f..747ef558033a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -701,8 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index ca12d6faa8c2..11a544fbf197 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -853,8 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, - NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) goto err1; -- cgit From 257bf15a4b9795f8b352beb6e72a7e3e5aab8d27 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Feb 2016 11:24:58 +0100 Subject: drm/amdgpu: add slap cache for sync objects as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need them all the time. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 36 ++++++++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index fab6ddb26b5b..3e4ec56919c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -634,6 +634,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_wait(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_sync *sync); +int amdgpu_sync_init(void); +void amdgpu_sync_fini(void); /* * GART structures, functions & helpers diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ce79a8b605a0..875333bb4e45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -539,6 +539,7 @@ static struct pci_driver amdgpu_kms_pci_driver = { static int __init amdgpu_init(void) { + amdgpu_sync_init(); #ifdef CONFIG_VGA_CONSOLE if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); @@ -563,6 +564,7 @@ static void __exit amdgpu_exit(void) amdgpu_amdkfd_fini(); drm_pci_exit(driver, pdriver); amdgpu_unregister_atpx_handler(); + amdgpu_sync_fini(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index e3673422aac8..c48b4fce5e57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -37,6 +37,8 @@ struct amdgpu_sync_entry { struct fence *fence; }; +static struct kmem_cache *amdgpu_sync_slab; + /** * amdgpu_sync_create - zero init sync object * @@ -133,7 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; } - e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); + e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); if (!e) return -ENOMEM; @@ -214,7 +216,7 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) f = e->fence; hash_del(&e->node); - kfree(e); + kmem_cache_free(amdgpu_sync_slab, e); if (!fence_is_signaled(f)) return f; @@ -237,7 +239,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync) hash_del(&e->node); fence_put(e->fence); - kfree(e); + kmem_cache_free(amdgpu_sync_slab, e); } return 0; @@ -259,8 +261,34 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) hash_for_each_safe(sync->fences, i, tmp, e, node) { hash_del(&e->node); fence_put(e->fence); - kfree(e); + kmem_cache_free(amdgpu_sync_slab, e); } fence_put(sync->last_vm_update); } + +/** + * amdgpu_sync_init - init sync object subsystem + * + * Allocate the slab allocator. + */ +int amdgpu_sync_init(void) +{ + amdgpu_sync_slab = kmem_cache_create( + "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!amdgpu_sync_slab) + return -ENOMEM; + + return 0; +} + +/** + * amdgpu_sync_fini - fini sync object subsystem + * + * Free the slab allocator. + */ +void amdgpu_sync_fini(void) +{ + kmem_cache_destroy(amdgpu_sync_slab); +} -- cgit From 364beb2cc45247e980a097e53d0932e143873333 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Feb 2016 17:39:39 +0100 Subject: drm/amdgpu: return the common fence from amdgpu_fence_emit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to avoid using the hardware specific fences even more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 27 ++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 7 +++---- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3e4ec56919c7..128eba604f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -429,7 +429,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -766,7 +766,7 @@ struct amdgpu_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; - struct amdgpu_fence *fence; + struct fence *fence; struct amdgpu_user_fence *user; struct amdgpu_vm *vm; unsigned vm_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d94b13ac290f..83599f2a0387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -91,28 +91,29 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with - * @fence: amdgpu fence object + * @f: resulting fence object * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence **fence) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_fence *fence; - *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); - if ((*fence) == NULL) { + fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); + if (fence == NULL) return -ENOMEM; - } - (*fence)->seq = ++ring->fence_drv.sync_seq; - (*fence)->ring = ring; - fence_init(&(*fence)->base, &amdgpu_fence_ops, - &ring->fence_drv.fence_queue.lock, - adev->fence_context + ring->idx, - (*fence)->seq); + + fence->seq = ++ring->fence_drv.sync_seq; + fence->ring = ring; + fence_init(&fence->base, &amdgpu_fence_ops, + &ring->fence_drv.fence_queue.lock, + adev->fence_context + ring->idx, + fence->seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - (*fence)->seq, - AMDGPU_FENCE_FLAG_INT); + fence->seq, AMDGPU_FENCE_FLAG_INT); + *f = &fence->base; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 9550247b030d..979c445f8096 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -90,9 +90,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) { - amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); - if (ib->fence) - fence_put(&ib->fence->base); + amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); + fence_put(ib->fence); } /** @@ -198,7 +197,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if (f) - *f = fence_get(&ib->fence->base); + *f = fence_get(ib->fence); amdgpu_ring_commit(ring); return 0; -- cgit From cffadc83c7016ba68721affe5da537ead279e2d2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 1 Mar 2016 13:34:49 +0100 Subject: drm/amdgpu: move the GDS switch into vm flush as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After all it's an operation on the VMID. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 11 ++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 128eba604f97..b6fae4b301d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -957,8 +957,10 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct fence *fence, unsigned *vm_id, uint64_t *vm_pd_addr); void amdgpu_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, - uint64_t pd_addr); + unsigned vm_id, uint64_t pd_addr, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 979c445f8096..e63e57e51db7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -150,13 +150,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (vm) { /* do context switch */ - amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr); - - if (ring->funcs->emit_gds_switch) - amdgpu_ring_emit_gds_switch(ring, ib->vm_id, - ib->gds_base, ib->gds_size, - ib->gws_base, ib->gws_size, - ib->oa_base, ib->oa_size); + amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr, + ib->gds_base, ib->gds_size, + ib->gws_base, ib->gws_size, + ib->oa_base, ib->oa_size); if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 364db7c45c67..5fab5b25b935 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -241,19 +241,27 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vmid: vmid number to use + * @vm_id: vmid number to use * @pd_addr: address of the page directory * * Emit a VM flush when it is necessary. */ void amdgpu_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, - uint64_t pd_addr) + unsigned vm_id, uint64_t pd_addr, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) { if (pd_addr != AMDGPU_VM_NO_FLUSH) { - trace_amdgpu_vm_flush(pd_addr, ring->idx, vmid); - amdgpu_ring_emit_vm_flush(ring, vmid, pd_addr); + trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); + amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); } + + if (ring->funcs->emit_gds_switch) + amdgpu_ring_emit_gds_switch(ring, vm_id, + gds_base, gds_size, + gws_base, gws_size, + oa_base, oa_size); } /** -- cgit From 971fe9a9414b2ccabc11ff6a5ff6be0d6f2dabda Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 1 Mar 2016 15:09:25 +0100 Subject: drm/amdgpu: switch the GDS only on demand v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switching the GDS space to often seems to be problematic. This patch together with the following can avoid VM faults on context switch. v2: extend commit message a bit Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Chunming Zhou (v1) --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 43 ++++++++++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b6fae4b301d7..866b5fa298e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -922,6 +922,13 @@ struct amdgpu_vm_manager_id { struct list_head list; struct fence *active; atomic_long_t owner; + + uint32_t gds_base; + uint32_t gds_size; + uint32_t gws_base; + uint32_t gws_size; + uint32_t oa_base; + uint32_t oa_size; }; struct amdgpu_vm_manager { @@ -961,6 +968,7 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size); +void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index e63e57e51db7..798d46626820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -165,6 +165,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ib->ctx != ctx || ib->vm != vm) { ring->current_ctx = old_ctx; + if (ib->vm_id) + amdgpu_vm_reset_id(adev, ib->vm_id); amdgpu_ring_undo(ring); return -EINVAL; } @@ -181,6 +183,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); ring->current_ctx = old_ctx; + if (ib->vm_id) + amdgpu_vm_reset_id(adev, ib->vm_id); amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5fab5b25b935..8642a1ccd6c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -252,16 +252,53 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size) { + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; + if (pd_addr != AMDGPU_VM_NO_FLUSH) { trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); } - if (ring->funcs->emit_gds_switch) + if (ring->funcs->emit_gds_switch && ( + mgr_id->gds_base != gds_base || + mgr_id->gds_size != gds_size || + mgr_id->gws_base != gws_base || + mgr_id->gws_size != gws_size || + mgr_id->oa_base != oa_base || + mgr_id->oa_size != oa_size)) { + + mgr_id->gds_base = gds_base; + mgr_id->gds_size = gds_size; + mgr_id->gws_base = gws_base; + mgr_id->gws_size = gws_size; + mgr_id->oa_base = oa_base; + mgr_id->oa_size = oa_size; amdgpu_ring_emit_gds_switch(ring, vm_id, gds_base, gds_size, gws_base, gws_size, oa_base, oa_size); + } +} + +/** + * amdgpu_vm_reset_id - reset VMID to zero + * + * @adev: amdgpu device structure + * @vm_id: vmid number to use + * + * Reset saved GDW, GWS and OA to force switch on next flush. + */ +void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) +{ + struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; + + mgr_id->gds_base = 0; + mgr_id->gds_size = 0; + mgr_id->gws_base = 0; + mgr_id->gws_size = 0; + mgr_id->oa_base = 0; + mgr_id->oa_size = 0; } /** @@ -1425,9 +1462,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&adev->vm_manager.ids_lru); /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < adev->vm_manager.num_ids; ++i) + for (i = 1; i < adev->vm_manager.num_ids; ++i) { + amdgpu_vm_reset_id(adev, i); list_add_tail(&adev->vm_manager.ids[i].list, &adev->vm_manager.ids_lru); + } atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); } -- cgit From b8c7b39ec1db98199df5bee33ca145ed9c2c62e7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 1 Mar 2016 15:42:52 +0100 Subject: drm/amdgpu: split pipeline sync and vm flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to use the pipeline sync for other tasks as well. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 29 ++++++++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 +++++++++-- 4 files changed, 35 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 866b5fa298e7..e0727de9c2b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -326,6 +326,7 @@ struct amdgpu_ring_funcs { struct amdgpu_ib *ib); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); + void (*emit_pipeline_sync)(struct amdgpu_ring *ring); void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); @@ -2234,6 +2235,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) #define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) +#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8642a1ccd6c3..9a9abbab9b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -257,6 +257,8 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, if (pd_addr != AMDGPU_VM_NO_FLUSH) { trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); + if (ring->funcs->emit_pipeline_sync) + amdgpu_ring_emit_pipeline_sync(ring); amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 46c2436d74bd..a4913ebb673c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3041,6 +3041,26 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) return 0; } +/** + * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP + * + * @ring: the ring to emmit the commands to + * + * Sync the command pipeline with the PFP. E.g. wait for everything + * to be completed. + */ +static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + if (usepfp) { + /* synce CE with ME to prevent CE fetch CEIB before context switch done */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } +} + /* * vm * VMID 0 is the physical GPU addresses as used by the kernel. @@ -3059,13 +3079,6 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); - if (usepfp) { - /* synce CE with ME to prevent CE fetch CEIB before context switch done */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); - } amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | @@ -5147,6 +5160,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .parse_cs = NULL, .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, + .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, @@ -5164,6 +5178,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .parse_cs = NULL, .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, + .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e0b64de9b5af..828e205a7326 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4692,8 +4692,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, } -static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) +static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); uint32_t seq = ring->fence_drv.sync_seq; @@ -4715,6 +4714,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } +} + +static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | @@ -5037,6 +5042,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .parse_cs = NULL, .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, + .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, @@ -5054,6 +5060,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .parse_cs = NULL, .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, + .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, -- cgit From 2f568dbd6b944c2e8c0c54b53c2211c23995e6a4 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 23 Feb 2016 12:36:59 +0100 Subject: drm/amdgpu: move get_user_pages out of amdgpu_ttm_tt_pin_userptr v6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That avoids lock inversion between the BO reservation lock and the anon_vma lock. v2: * Changed amdgpu_bo_list_entry.user_pages to an array of pointers * Lock mmap_sem only for get_user_pages * Added invalidation of unbound userpointer BOs * Fixed memory leak and page reference leak v3 (chk): * Revert locking mmap_sem only for_get user_pages * Revert adding invalidation of unbound userpointer BOs * Sanitize and fix error handling v4 (chk): * Init userpages pointer everywhere. * Fix error handling when get_user_pages() fails. * Add invalidation of unbound userpointer BOs again. v5 (chk): * Add maximum number of tries. v6 (chk): * Fix error handling when we run out of tries. Signed-off-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling (v4) Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 114 +++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 23 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 53 +++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 + 6 files changed, 174 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e0727de9c2b2..e32ab13227d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -471,6 +471,8 @@ struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; uint32_t priority; + struct page **user_pages; + int user_invalidated; }; struct amdgpu_bo_va_mapping { @@ -2365,11 +2367,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, struct amdgpu_ring **out_ring); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); +int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 9763e52886fe..eacd810fc09b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -200,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, list_add_tail(&list->array[i].tv.head, &bucket[priority]); + list->array[i].user_pages = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 7833dfb1ff6e..4f5ef4149e87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -111,6 +111,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.priority = 0; p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.shared = true; + p->uf_entry.user_pages = NULL; drm_gem_object_unreference_unlocked(gobj); return 0; @@ -297,6 +298,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = lobj->robj; + bool binding_userptr = false; struct mm_struct *usermm; uint32_t domain; @@ -304,6 +306,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; + /* Check if we have user pages and nobody bound the BO already */ + if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { + size_t size = sizeof(struct page *); + + size *= bo->tbo.ttm->num_pages; + memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); + binding_userptr = true; + } + if (bo->pin_count) continue; @@ -334,6 +345,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, } return r; } + + if (binding_userptr) { + drm_free_large(lobj->user_pages); + lobj->user_pages = NULL; + } } return 0; } @@ -342,8 +358,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_bo_list_entry *e; struct list_head duplicates; bool need_mmap_lock = false; + unsigned i, tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -364,9 +382,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (need_mmap_lock) down_read(¤t->mm->mmap_sem); - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); - if (unlikely(r != 0)) - goto error_reserve; + while (1) { + struct list_head need_pages; + unsigned i; + + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) + goto error_free_pages; + + /* Without a BO list we don't have userptr BOs */ + if (!p->bo_list) + break; + + INIT_LIST_HEAD(&need_pages); + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + + e = &p->bo_list->array[i]; + + if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, + &e->user_invalidated) && e->user_pages) { + + /* We acquired a page array, but somebody + * invalidated it. Free it an try again + */ + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages, + false); + drm_free_large(e->user_pages); + e->user_pages = NULL; + } + + if (e->robj->tbo.ttm->state != tt_bound && + !e->user_pages) { + list_del(&e->tv.head); + list_add(&e->tv.head, &need_pages); + + amdgpu_bo_unreserve(e->robj); + } + } + + if (list_empty(&need_pages)) + break; + + /* Unreserve everything again. */ + ttm_eu_backoff_reservation(&p->ticket, &p->validated); + + /* We tried to often, just abort */ + if (!--tries) { + r = -EDEADLK; + goto error_free_pages; + } + + /* Fill the page arrays for all useptrs. */ + list_for_each_entry(e, &need_pages, tv.head) { + struct ttm_tt *ttm = e->robj->tbo.ttm; + + e->user_pages = drm_calloc_large(ttm->num_pages, + sizeof(struct page*)); + if (!e->user_pages) { + r = -ENOMEM; + goto error_free_pages; + } + + r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); + if (r) { + drm_free_large(e->user_pages); + e->user_pages = NULL; + goto error_free_pages; + } + } + + /* And try again. */ + list_splice(&need_pages, &p->validated); + } amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); @@ -398,10 +488,26 @@ error_validate: ttm_eu_backoff_reservation(&p->ticket, &p->validated); } -error_reserve: +error_free_pages: + if (need_mmap_lock) up_read(¤t->mm->mmap_sem); + if (p->bo_list) { + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + e = &p->bo_list->array[i]; + + if (!e->user_pages) + continue; + + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages, + false); + drm_free_large(e->user_pages); + } + } + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 2e26a517f2d6..cb27754ec07f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { down_read(¤t->mm->mmap_sem); + + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + bo->tbo.ttm->pages); + if (r) + goto unlock_mmap_sem; + r = amdgpu_bo_reserve(bo, true); - if (r) { - up_read(¤t->mm->mmap_sem); - goto release_object; - } + if (r) + goto free_pages; amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); amdgpu_bo_unreserve(bo); - up_read(¤t->mm->mmap_sem); if (r) - goto release_object; + goto free_pages; + + up_read(¤t->mm->mmap_sem); } r = drm_gem_handle_create(filp, gobj, &handle); @@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, args->handle = handle; return 0; +free_pages: + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false); + +unlock_mmap_sem: + up_read(¤t->mm->mmap_sem); + release_object: drm_gem_object_unreference_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 53e627970798..ed0d2e7cc57d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -508,22 +508,18 @@ struct amdgpu_ttm_tt { uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; + atomic_t mmu_invalidations; }; -/* prepare the sg table with the user pages */ -static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) +int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned pinned = 0, nents; - int r; - int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); - enum dma_data_direction direction = write ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + unsigned pinned = 0; + int r; if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* check that we only pin down anonymous memory + /* check that we only use anonymous memory to prevent problems with writeback */ unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; @@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **pages = ttm->pages + pinned; + struct page **p = pages + pinned; struct amdgpu_ttm_gup_task_list guptask; guptask.task = current; @@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) spin_unlock(>t->guptasklock); r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); + write, 0, p, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) } while (pinned < ttm->num_pages); + return 0; + +release_pages: + release_pages(pages, pinned, 0); + return r; +} + +/* prepare the sg table with the user pages */ +static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned nents; + int r; + + int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, ttm->num_pages << PAGE_SHIFT, GFP_KERNEL); @@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) release_sg: kfree(ttm->sg); - -release_pages: - release_pages(ttm->pages, pinned, 0); return r; } @@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->userflags = flags; spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); + atomic_set(>t->mmu_invalidations, 0); return 0; } @@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, } spin_unlock(>t->guptasklock); + atomic_inc(>t->mmu_invalidations); + return true; } +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int prev_invalidated = *last_invalidated; + + *last_invalidated = atomic_read(>t->mmu_invalidations); + return prev_invalidated != *last_invalidated; +} + bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a0896c761108..cc967b44f104 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -95,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, entry->priority = 0; entry->tv.bo = &vm->page_directory->tbo; entry->tv.shared = true; + entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -1186,6 +1187,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, entry->priority = 0; entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; + entry->user_pages = NULL; vm->page_tables[pt_idx].addr = 0; } -- cgit From 20250215821140801369b84f8cbe79a459a82ba5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 8 Mar 2016 17:58:35 +0100 Subject: drm/amdgpu: Revert "add lock for interval tree in vm" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more because we need to protect the elements on the list anyway. This reverts commit fe237ed7efec8ac147a4572fdf81173a7f8ddda7. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++-------------- 2 files changed, 2 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e32ab13227d4..28b4088b2530 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -888,7 +888,6 @@ struct amdgpu_vm_id { struct amdgpu_vm { /* tree of virtual addresses mapped */ - spinlock_t it_lock; struct rb_root va; /* protecting invalidated */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7e73e54eadb9..0e6d0d1f4041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1107,9 +1107,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - spin_lock(&vm->it_lock); it = interval_tree_iter_first(&vm->va, saddr, eaddr); - spin_unlock(&vm->it_lock); if (it) { struct amdgpu_bo_va_mapping *tmp; tmp = container_of(it, struct amdgpu_bo_va_mapping, it); @@ -1136,10 +1134,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mutex_lock(&bo_va->mutex); list_add(&mapping->list, &bo_va->invalids); mutex_unlock(&bo_va->mutex); - spin_lock(&vm->it_lock); interval_tree_insert(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); - trace_amdgpu_vm_bo_map(bo_va, mapping); /* Make sure the page tables are allocated */ saddr >>= amdgpu_vm_block_size; @@ -1191,9 +1186,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, error_free: list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); kfree(mapping); @@ -1243,9 +1236,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, } mutex_unlock(&bo_va->mutex); list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (valid) @@ -1280,17 +1271,13 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); list_add(&mapping->list, &vm->freed); } list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_del(&mapping->list); - spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); - spin_unlock(&vm->it_lock); kfree(mapping); } fence_put(bo_va->last_pt_update); @@ -1347,7 +1334,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); - spin_lock_init(&vm->it_lock); + pd_size = amdgpu_vm_directory_size(adev); pd_entries = amdgpu_vm_num_pdes(adev); @@ -1434,6 +1421,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_vm_id *id = &vm->ids[i]; -- cgit From 32b41ac21fde8f7cea465d74c570fc7bd0089163 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 8 Mar 2016 18:03:27 +0100 Subject: drm/amdgpu: Revert "add mutex for ba_va->valids/invalids" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more because we need to protect the elements on the list anyway. This reverts commit 38bf516c75b4ef0f5c716e05fa9baab7c52d6c39. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++++++----------- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 28b4088b2530..9a03d566bf6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -484,7 +484,6 @@ struct amdgpu_bo_va_mapping { /* bo virtual addresses in a specific vm */ struct amdgpu_bo_va { - struct mutex mutex; /* protected by bo being reserved */ struct list_head bo_list; struct fence *last_pt_update; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0e6d0d1f4041..b6c011b83641 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1009,9 +1009,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, vm_status); spin_unlock(&vm->status_lock); - mutex_lock(&bo_va->mutex); + r = amdgpu_vm_bo_update(adev, bo_va, NULL); - mutex_unlock(&bo_va->mutex); if (r) return r; @@ -1055,7 +1054,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->vm_status); - mutex_init(&bo_va->mutex); + list_add_tail(&bo_va->bo_list, &bo->va); return bo_va; @@ -1131,9 +1130,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mapping->offset = offset; mapping->flags = flags; - mutex_lock(&bo_va->mutex); list_add(&mapping->list, &bo_va->invalids); - mutex_unlock(&bo_va->mutex); interval_tree_insert(&mapping->it, &vm->va); /* Make sure the page tables are allocated */ @@ -1215,7 +1212,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; - mutex_lock(&bo_va->mutex); + list_for_each_entry(mapping, &bo_va->valids, list) { if (mapping->it.start == saddr) break; @@ -1229,12 +1226,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, break; } - if (&mapping->list == &bo_va->invalids) { - mutex_unlock(&bo_va->mutex); + if (&mapping->list == &bo_va->invalids) return -ENOENT; - } } - mutex_unlock(&bo_va->mutex); + list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); @@ -1280,8 +1275,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, interval_tree_remove(&mapping->it, &vm->va); kfree(mapping); } + fence_put(bo_va->last_pt_update); - mutex_destroy(&bo_va->mutex); kfree(bo_va); } -- cgit