diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 267 |
1 files changed, 106 insertions, 161 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f808841310fd..9ddbf1494326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -22,6 +22,7 @@ */ #include <linux/firmware.h> +#include <drm/drm_exec.h> #include "amdgpu_mes.h" #include "amdgpu.h" @@ -38,120 +39,70 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) PAGE_SIZE); } -int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev, - unsigned int *doorbell_index) -{ - int r = ida_simple_get(&adev->mes.doorbell_ida, 2, - adev->mes.max_doorbell_slices, - GFP_KERNEL); - if (r > 0) - *doorbell_index = r; - - return r; -} - -void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev, - unsigned int doorbell_index) -{ - if (doorbell_index) - ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index); -} - -unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( - struct amdgpu_device *adev, - uint32_t doorbell_index, - unsigned int doorbell_id) -{ - return ((doorbell_index * - amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) + - doorbell_id * 2); -} - -static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev, +static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, struct amdgpu_mes_process *process, int ip_type, uint64_t *doorbell_index) { unsigned int offset, found; + struct amdgpu_mes *mes = &adev->mes; - if (ip_type == AMDGPU_RING_TYPE_SDMA) { + if (ip_type == AMDGPU_RING_TYPE_SDMA) offset = adev->doorbell_index.sdma_engine[0]; - found = find_next_zero_bit(process->doorbell_bitmap, - AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, - offset); - } else { - found = find_first_zero_bit(process->doorbell_bitmap, - AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS); - } + else + offset = 0; - if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) { + found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); + if (found >= mes->num_mes_dbs) { DRM_WARN("No doorbell available\n"); return -ENOSPC; } - set_bit(found, process->doorbell_bitmap); - - *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev, - process->doorbell_index, found); + set_bit(found, mes->doorbell_bitmap); + /* Get the absolute doorbell index on BAR */ + *doorbell_index = mes->db_start_dw_offset + found * 2; return 0; } -static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev, +static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, struct amdgpu_mes_process *process, uint32_t doorbell_index) { - unsigned int old, doorbell_id; + unsigned int old, rel_index; + struct amdgpu_mes *mes = &adev->mes; - doorbell_id = doorbell_index - - (process->doorbell_index * - amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32); - doorbell_id /= 2; - - old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap); + /* Find the relative index of the doorbell in this object */ + rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; + old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); WARN_ON(!old); } static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) { - size_t doorbell_start_offset; - size_t doorbell_aperture_size; - size_t doorbell_process_limit; - size_t aggregated_doorbell_start; int i; + struct amdgpu_mes *mes = &adev->mes; - aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); - aggregated_doorbell_start = - roundup(aggregated_doorbell_start, PAGE_SIZE); - - doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE; - doorbell_start_offset = - roundup(doorbell_start_offset, - amdgpu_mes_doorbell_process_slice(adev)); - - doorbell_aperture_size = adev->doorbell.size; - doorbell_aperture_size = - rounddown(doorbell_aperture_size, - amdgpu_mes_doorbell_process_slice(adev)); - - if (doorbell_aperture_size > doorbell_start_offset) - doorbell_process_limit = - (doorbell_aperture_size - doorbell_start_offset) / - amdgpu_mes_doorbell_process_slice(adev); - else - return -ENOSPC; - - adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); - adev->mes.max_doorbell_slices = doorbell_process_limit; + /* Bitmap for dynamic allocation of kernel doorbells */ + mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); + if (!mes->doorbell_bitmap) { + DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); + return -ENOMEM; + } - /* allocate Qword range for aggregated doorbell */ - for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) - adev->mes.aggregated_doorbells[i] = - aggregated_doorbell_start / sizeof(u32) + i * 2; + mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; + for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { + adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; + set_bit(i, mes->doorbell_bitmap); + } - DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit); return 0; } +static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) +{ + bitmap_free(adev->mes.doorbell_bitmap); +} + int amdgpu_mes_init(struct amdgpu_device *adev) { int i, r; @@ -181,7 +132,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < + IP_VERSION(6, 0, 0)) adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; /* zero sdma_hqd_mask for non-existent engine */ else if (adev->sdma.num_instances == 1) @@ -250,6 +202,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -278,15 +231,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, return -ENOMEM; } - process->doorbell_bitmap = - kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, - BITS_PER_BYTE), GFP_KERNEL); - if (!process->doorbell_bitmap) { - DRM_ERROR("failed to allocate doorbell bitmap\n"); - kfree(process); - return -ENOMEM; - } - /* allocate the process context bo and map it */ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, @@ -313,15 +257,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, goto clean_up_ctx; } - /* allocate the starting doorbell index of the process */ - r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); - if (r < 0) { - DRM_ERROR("failed to allocate doorbell for process\n"); - goto clean_up_pasid; - } - - DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index); - INIT_LIST_HEAD(&process->gang_list); process->vm = vm; process->pasid = pasid; @@ -331,15 +266,12 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, amdgpu_mes_unlock(&adev->mes); return 0; -clean_up_pasid: - idr_remove(&adev->mes.pasid_idr, pasid); - amdgpu_mes_unlock(&adev->mes); clean_up_ctx: + amdgpu_mes_unlock(&adev->mes); amdgpu_bo_free_kernel(&process->proc_ctx_bo, &process->proc_ctx_gpu_addr, &process->proc_ctx_cpu_ptr); clean_up_memory: - kfree(process->doorbell_bitmap); kfree(process); return r; } @@ -385,7 +317,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) idr_remove(&adev->mes.gang_id_idr, gang->gang_id); } - amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); idr_remove(&adev->mes.pasid_idr, pasid); amdgpu_mes_unlock(&adev->mes); @@ -407,7 +338,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) amdgpu_bo_free_kernel(&process->proc_ctx_bo, &process->proc_ctx_gpu_addr, &process->proc_ctx_cpu_ptr); - kfree(process->doorbell_bitmap); kfree(process); } @@ -627,8 +557,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); + } + mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_bo_unreserve(q->mqd_obj); } @@ -642,6 +584,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, unsigned long flags; int r; + memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); + /* allocate the mes queue buffer */ queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); if (!queue) { @@ -679,7 +623,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, *queue_id = queue->queue_id = r; /* allocate a doorbell index for the queue */ - r = amdgpu_mes_queue_doorbell_get(adev, gang->process, + r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, qprops->queue_type, &qprops->doorbell_off); if (r) @@ -737,7 +681,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, return 0; clean_up_doorbell: - amdgpu_mes_queue_doorbell_free(adev, gang->process, + amdgpu_mes_kernel_doorbell_free(adev, gang->process, qprops->doorbell_off); clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); @@ -792,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue_id); list_del(&queue->list); - amdgpu_mes_queue_doorbell_free(adev, gang->process, + amdgpu_mes_kernel_doorbell_free(adev, gang->process, queue->doorbell_off); amdgpu_mes_unlock(&adev->mes); @@ -1062,9 +1006,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, switch (queue_type) { case AMDGPU_RING_TYPE_GFX: ring->funcs = adev->gfx.gfx_ring[0].funcs; + ring->me = adev->gfx.gfx_ring[0].me; + ring->pipe = adev->gfx.gfx_ring[0].pipe; break; case AMDGPU_RING_TYPE_COMPUTE: ring->funcs = adev->gfx.compute_ring[0].funcs; + ring->me = adev->gfx.compute_ring[0].me; + ring->pipe = adev->gfx.compute_ring[0].pipe; break; case AMDGPU_RING_TYPE_SDMA: ring->funcs = adev->sdma.instance[0].ring.funcs; @@ -1168,34 +1116,31 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data) { struct amdgpu_bo_va *bo_va; - struct ww_acquire_ctx ticket; - struct list_head list; - struct amdgpu_bo_list_entry pd; - struct ttm_validate_buffer csa_tv; struct amdgpu_sync sync; + struct drm_exec exec; int r; amdgpu_sync_create(&sync); - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&csa_tv.head); - - csa_tv.bo = &ctx_data->meta_data_obj->tbo; - csa_tv.num_shared = 1; - - list_add(&csa_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - DRM_ERROR("failed to reserve meta data BO: err=%d\n", r); - return r; + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, + &ctx_data->meta_data_obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_fini_exec; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_fini_exec; } bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); if (!bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("failed to create bo_va for meta data BO\n"); - return -ENOMEM; + r = -ENOMEM; + goto error_fini_exec; } r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, @@ -1205,33 +1150,35 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, if (r) { DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); - goto error; + goto error_del_bo_va; } r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) { DRM_ERROR("failed to do vm_bo_update on meta data\n"); - goto error; + goto error_del_bo_va; } amdgpu_sync_fence(&sync, bo_va->last_pt_update); r = amdgpu_vm_update_pdes(adev, vm, false); if (r) { DRM_ERROR("failed to update pdes on meta data\n"); - goto error; + goto error_del_bo_va; } amdgpu_sync_fence(&sync, vm->last_update); amdgpu_sync_wait(&sync, false); - ttm_eu_backoff_reservation(&ticket, &list); + drm_exec_fini(&exec); amdgpu_sync_free(&sync); ctx_data->meta_data_va = bo_va; return 0; -error: +error_del_bo_va: amdgpu_vm_bo_del(adev, bo_va); - ttm_eu_backoff_reservation(&ticket, &list); + +error_fini_exec: + drm_exec_fini(&exec); amdgpu_sync_free(&sync); return r; } @@ -1242,34 +1189,30 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; struct amdgpu_bo *bo = ctx_data->meta_data_obj; struct amdgpu_vm *vm = bo_va->base.vm; - struct amdgpu_bo_list_entry vm_pd; - struct list_head list, duplicates; - struct dma_fence *fence = NULL; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - long r = 0; - - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); - - tv.bo = &bo->tbo; - tv.num_shared = 2; - list_add(&tv.head, &list); - - amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); - if (r) { - dev_err(adev->dev, "leaking bo va because " - "we fail to reserve bo (%ld)\n", r); - return r; + struct dma_fence *fence; + struct drm_exec exec; + long r; + + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, + &ctx_data->meta_data_obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; } amdgpu_vm_bo_del(adev, bo_va); if (!amdgpu_vm_ready(vm)) goto out_unlock; - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence); + r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, + &fence); if (r) goto out_unlock; if (fence) { @@ -1288,7 +1231,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, out_unlock: if (unlikely(r < 0)) dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); - ttm_eu_backoff_reservation(&ticket, &list); + drm_exec_fini(&exec); return r; } @@ -1409,8 +1352,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(queue_types); i++) { /* On GFX v10.3, fw hasn't supported to map sdma queue. */ - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) && - adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) && + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(10, 3, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(11, 0, 0) && queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) continue; @@ -1471,7 +1416,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", ucode_prefix, pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); |