aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c335
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_api_def.h443
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c69
7 files changed, 379 insertions, 574 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 40df1e04d682..5d6b04fc6206 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -367,7 +367,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
/* create MQD for KIQ */
ring = &adev->gfx.kiq.ring;
- if (!ring->mqd_obj) {
+ if (!adev->enable_mes_kiq && !ring->mqd_obj) {
/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
* otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
* deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
@@ -464,7 +464,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i, r;
+ int i, r = 0;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -479,7 +479,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
RESET_QUEUES, 0, 0);
- r = amdgpu_ring_test_helper(kiq_ring);
+
+ if (adev->gfx.kiq.ring.sched.ready)
+ r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 5be30bf68b0c..72bafba1c470 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -150,7 +150,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
- mutex_init(&adev->mes.mutex);
+ mutex_init(&adev->mes.mutex_hidden);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00;
@@ -166,8 +166,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
- for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+ adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ else
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
+ }
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
adev->mes.agreegated_doorbells[i] = 0xffffffff;
@@ -207,7 +211,7 @@ error_ids:
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
ida_destroy(&adev->mes.doorbell_ida);
- mutex_destroy(&adev->mes.mutex);
+ mutex_destroy(&adev->mes.mutex_hidden);
return r;
}
@@ -219,7 +223,14 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
ida_destroy(&adev->mes.doorbell_ida);
- mutex_destroy(&adev->mes.mutex);
+ mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
+{
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
}
int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
@@ -228,13 +239,10 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_process *process;
int r;
- mutex_lock(&adev->mes.mutex);
-
/* allocate the mes process buffer */
process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
if (!process) {
DRM_ERROR("no more memory to create mes process\n");
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
@@ -244,18 +252,9 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
if (!process->doorbell_bitmap) {
DRM_ERROR("failed to allocate doorbell bitmap\n");
kfree(process);
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
- /* add the mes process to idr list */
- r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to lock pasid=%d\n", pasid);
- goto clean_up_memory;
- }
-
/* allocate the process context bo and map it */
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
@@ -264,15 +263,29 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
&process->proc_ctx_cpu_ptr);
if (r) {
DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_pasid;
+ goto clean_up_memory;
}
memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ /* add the mes process to idr list */
+ r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to lock pasid=%d\n", pasid);
+ goto clean_up_ctx;
+ }
+
/* allocate the starting doorbell index of the process */
r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
if (r < 0) {
DRM_ERROR("failed to allocate doorbell for process\n");
- goto clean_up_ctx;
+ goto clean_up_pasid;
}
DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
@@ -283,19 +296,19 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
process->process_quantum = adev->mes.default_process_quantum;
process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
+clean_up_pasid:
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
clean_up_ctx:
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
&process->proc_ctx_gpu_addr,
&process->proc_ctx_cpu_ptr);
-clean_up_pasid:
- idr_remove(&adev->mes.pasid_idr, pasid);
clean_up_memory:
kfree(process->doorbell_bitmap);
kfree(process);
- mutex_unlock(&adev->mes.mutex);
return r;
}
@@ -308,18 +321,21 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
unsigned long flags;
int r;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
process = idr_find(&adev->mes.pasid_idr, pasid);
if (!process) {
DRM_WARN("pasid %d doesn't exist\n", pasid);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return;
}
- /* free all gangs in the process */
+ /* Remove all queues from hardware */
list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- /* free all queues in the gang */
list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
@@ -332,29 +348,35 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
&queue_input);
if (r)
DRM_WARN("failed to remove hardware queue\n");
+ }
+
+ idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ }
+ amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
+
+ /* free all memory allocated by the process */
+ list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+ /* free all queues in the gang */
+ list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+ amdgpu_mes_queue_free_mqd(queue);
list_del(&queue->list);
kfree(queue);
}
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
&gang->gang_ctx_gpu_addr,
&gang->gang_ctx_cpu_ptr);
list_del(&gang->list);
kfree(gang);
- }
- amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
-
- idr_remove(&adev->mes.pasid_idr, pasid);
+ }
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
&process->proc_ctx_gpu_addr,
&process->proc_ctx_cpu_ptr);
kfree(process->doorbell_bitmap);
kfree(process);
-
- mutex_unlock(&adev->mes.mutex);
}
int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
@@ -365,34 +387,12 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_gang *gang;
int r;
- mutex_lock(&adev->mes.mutex);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_ERROR("pasid %d doesn't exist\n", pasid);
- mutex_unlock(&adev->mes.mutex);
- return -EINVAL;
- }
-
/* allocate the mes gang buffer */
gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
if (!gang) {
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
- /* add the mes gang to idr list */
- r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
- GFP_KERNEL);
- if (r < 0) {
- kfree(gang);
- mutex_unlock(&adev->mes.mutex);
- return r;
- }
-
- gang->gang_id = r;
- *gang_id = r;
-
/* allocate the gang context bo and map it to cpu space */
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
@@ -401,10 +401,34 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
&gang->gang_ctx_cpu_ptr);
if (r) {
DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up;
+ goto clean_up_mem;
}
memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ process = idr_find(&adev->mes.pasid_idr, pasid);
+ if (!process) {
+ DRM_ERROR("pasid %d doesn't exist\n", pasid);
+ r = -EINVAL;
+ goto clean_up_ctx;
+ }
+
+ /* add the mes gang to idr list */
+ r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to allocate idr for gang\n");
+ goto clean_up_ctx;
+ }
+
+ gang->gang_id = r;
+ *gang_id = r;
+
INIT_LIST_HEAD(&gang->queue_list);
gang->process = process;
gang->priority = gprops->priority;
@@ -414,13 +438,16 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
list_add_tail(&gang->list, &process->gang_list);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
-clean_up:
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+clean_up_ctx:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+clean_up_mem:
kfree(gang);
- mutex_unlock(&adev->mes.mutex);
return r;
}
@@ -428,29 +455,35 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
{
struct amdgpu_mes_gang *gang;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
gang = idr_find(&adev->mes.gang_id_idr, gang_id);
if (!gang) {
DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EINVAL;
}
if (!list_empty(&gang->queue_list)) {
DRM_ERROR("queue list is not empty\n");
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EBUSY;
}
idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ list_del(&gang->list);
+ amdgpu_mes_unlock(&adev->mes);
+
amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
&gang->gang_ctx_gpu_addr,
&gang->gang_ctx_cpu_ptr);
- list_del(&gang->list);
+
kfree(gang);
- mutex_unlock(&adev->mes.mutex);
return 0;
}
@@ -462,7 +495,11 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
struct mes_suspend_gang_input input;
int r, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
idp = &adev->mes.pasid_idr;
@@ -475,7 +512,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
}
}
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
}
@@ -487,7 +524,11 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
struct mes_resume_gang_input input;
int r, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
idp = &adev->mes.pasid_idr;
@@ -500,17 +541,16 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
}
}
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
}
-static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
struct amdgpu_mes_queue *q,
struct amdgpu_mes_queue_properties *p)
{
struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
u32 mqd_size = mqd_mgr->mqd_size;
- struct amdgpu_mqd_prop mqd_prop = {0};
int r;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
@@ -523,6 +563,26 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
}
memset(q->mqd_cpu_ptr, 0, mqd_size);
+ r = amdgpu_bo_reserve(q->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto clean_up;
+
+ return 0;
+
+clean_up:
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
+ return r;
+}
+
+static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+ struct amdgpu_mes_queue *q,
+ struct amdgpu_mes_queue_properties *p)
+{
+ struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+ struct amdgpu_mqd_prop mqd_prop = {0};
+
mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
@@ -535,27 +595,9 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
mqd_prop.hqd_active = false;
- r = amdgpu_bo_reserve(q->mqd_obj, false);
- if (unlikely(r != 0))
- goto clean_up;
-
mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
amdgpu_bo_unreserve(q->mqd_obj);
- return 0;
-
-clean_up:
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
- return r;
-}
-
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
}
int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
@@ -568,29 +610,38 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
unsigned long flags;
int r;
- mutex_lock(&adev->mes.mutex);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
- return -EINVAL;
- }
-
/* allocate the mes queue buffer */
queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
if (!queue) {
- mutex_unlock(&adev->mes.mutex);
+ DRM_ERROR("Failed to allocate memory for queue\n");
return -ENOMEM;
}
+ /* Allocate the queue mqd */
+ r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
+ if (r)
+ goto clean_up_memory;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+ if (!gang) {
+ DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+ r = -EINVAL;
+ goto clean_up_mqd;
+ }
+
/* add the mes gang to idr list */
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
GFP_ATOMIC);
if (r < 0) {
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- goto clean_up_memory;
+ goto clean_up_mqd;
}
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
*queue_id = queue->queue_id = r;
@@ -603,13 +654,15 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
goto clean_up_queue_id;
/* initialize the queue mqd */
- r = amdgpu_mes_queue_init_mqd(adev, queue, qprops);
- if (r)
- goto clean_up_doorbell;
+ amdgpu_mes_queue_init_mqd(adev, queue, qprops);
/* add hw queue to mes */
queue_input.process_id = gang->process->pasid;
- queue_input.page_table_base_addr = gang->process->pd_gpu_addr;
+
+ queue_input.page_table_base_addr =
+ adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
+ adev->gmc.vram_start;
+
queue_input.process_va_start = 0;
queue_input.process_va_end =
(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
@@ -629,7 +682,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
if (r) {
DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
qprops->doorbell_off);
- goto clean_up_mqd;
+ goto clean_up_doorbell;
}
DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
@@ -645,11 +698,9 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
queue->gang = gang;
list_add_tail(&queue->list, &gang->queue_list);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
-clean_up_mqd:
- amdgpu_mes_queue_free_mqd(queue);
clean_up_doorbell:
amdgpu_mes_queue_doorbell_free(adev, gang->process,
qprops->doorbell_off);
@@ -657,9 +708,11 @@ clean_up_queue_id:
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+clean_up_mqd:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_mes_queue_free_mqd(queue);
clean_up_memory:
kfree(queue);
- mutex_unlock(&adev->mes.mutex);
return r;
}
@@ -671,7 +724,11 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
struct mes_remove_queue_input queue_input;
int r;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
/* remove the mes gang from idr list */
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
@@ -679,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
queue = idr_find(&adev->mes.queue_id_idr, queue_id);
if (!queue) {
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
DRM_ERROR("queue id %d doesn't exist\n", queue_id);
return -EINVAL;
}
@@ -699,15 +756,42 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
queue_id);
- amdgpu_mes_queue_free_mqd(queue);
list_del(&queue->list);
amdgpu_mes_queue_doorbell_free(adev, gang->process,
queue->doorbell_off);
+ amdgpu_mes_unlock(&adev->mes);
+
+ amdgpu_mes_queue_free_mqd(queue);
kfree(queue);
- mutex_unlock(&adev->mes.mutex);
return 0;
}
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct mes_unmap_legacy_queue_input queue_input;
+ int r;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
+
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ if (r)
+ DRM_ERROR("failed to unmap legacy queue\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+ return r;
+}
+
static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
@@ -771,18 +855,22 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
struct amdgpu_mes_queue_properties qprops = {0};
int r, queue_id, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
gang = idr_find(&adev->mes.gang_id_idr, gang_id);
if (!gang) {
DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EINVAL;
}
pasid = gang->process->pasid;
ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
if (!ring) {
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -ENOMEM;
}
@@ -823,7 +911,7 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
dma_fence_wait(gang->process->vm->last_update, false);
dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
if (r)
@@ -850,7 +938,7 @@ clean_up_ring:
amdgpu_ring_fini(ring);
clean_up_memory:
kfree(ring);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return r;
}
@@ -1086,9 +1174,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
}
for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
- /* On sienna cichlid+, fw hasn't supported to map sdma queue. */
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
- i == AMDGPU_RING_TYPE_SDMA)
+ /* On GFX v10.3, fw hasn't supported to map sdma queue. */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
+ adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+ queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
continue;
r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 548015bb6ee7..25590b301f25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -56,7 +56,7 @@ enum admgpu_mes_pipe {
struct amdgpu_mes {
struct amdgpu_device *adev;
- struct mutex mutex;
+ struct mutex mutex_hidden;
struct idr pasid_idr;
struct idr gang_id_idr;
@@ -109,9 +109,11 @@ struct amdgpu_mes {
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
+ uint32_t saved_flags;
/* initialize kiq pipe */
int (*kiq_hw_init)(struct amdgpu_device *adev);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev);
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
@@ -198,6 +200,10 @@ struct mes_add_queue_input {
uint64_t wptr_addr;
uint32_t queue_type;
uint32_t paging;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
struct mes_remove_queue_input {
@@ -205,6 +211,16 @@ struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
+struct mes_unmap_legacy_queue_input {
+ enum amdgpu_unmap_queues_action action;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t trail_fence_addr;
+ uint64_t trail_fence_data;
+};
+
struct mes_suspend_gang_input {
bool suspend_all_gangs;
uint64_t gang_context_addr;
@@ -224,6 +240,9 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input);
+
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
@@ -232,6 +251,7 @@ struct amdgpu_mes_funcs {
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
@@ -255,6 +275,11 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
int *queue_id);
int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
struct amdgpu_mes_ctx_data *ctx_data,
@@ -280,4 +305,62 @@ unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
uint32_t doorbell_index,
unsigned int doorbell_id);
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+ mutex_lock(&mes->mutex_hidden);
+ mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+ memalloc_noreclaim_restore(mes->saved_flags);
+ mutex_unlock(&mes->mutex_hidden);
+}
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9042e0b480ce..3c4f2a94ad9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3551,8 +3551,14 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+ if (!adev->gfx.kiq.ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index b80b5f70ecf1..61db2a378008 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -274,7 +274,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
- if (adev->gfx.kiq.ring.sched.ready &&
+ if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
@@ -411,6 +411,10 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
+ /* MES fw manages IH_VMID_x_LUT updating */
+ if (ring->is_mes_queue)
+ return;
+
if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
@@ -803,6 +807,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
}
amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
r = adev->mmhub.funcs->gart_enable(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
deleted file mode 100644
index 3f4fca5fd1da..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __MES_API_DEF_H__
-#define __MES_API_DEF_H__
-
-#pragma pack(push, 4)
-
-#define MES_API_VERSION 1
-
-/* Driver submits one API(cmd) as a single Frame and this command size is same
- * for all API to ease the debugging and parsing of ring buffer.
- */
-enum { API_FRAME_SIZE_IN_DWORDS = 64 };
-
-/* To avoid command in scheduler context to be overwritten whenenver mutilple
- * interrupts come in, this creates another queue.
- */
-enum { API_NUMBER_OF_COMMAND_MAX = 32 };
-
-enum MES_API_TYPE {
- MES_API_TYPE_SCHEDULER = 1,
- MES_API_TYPE_MAX
-};
-
-enum MES_SCH_API_OPCODE {
- MES_SCH_API_SET_HW_RSRC = 0,
- MES_SCH_API_SET_SCHEDULING_CONFIG = 1, /* agreegated db, quantums, etc */
- MES_SCH_API_ADD_QUEUE = 2,
- MES_SCH_API_REMOVE_QUEUE = 3,
- MES_SCH_API_PERFORM_YIELD = 4,
- MES_SCH_API_SET_GANG_PRIORITY_LEVEL = 5,
- MES_SCH_API_SUSPEND = 6,
- MES_SCH_API_RESUME = 7,
- MES_SCH_API_RESET = 8,
- MES_SCH_API_SET_LOG_BUFFER = 9,
- MES_SCH_API_CHANGE_GANG_PRORITY = 10,
- MES_SCH_API_QUERY_SCHEDULER_STATUS = 11,
- MES_SCH_API_PROGRAM_GDS = 12,
- MES_SCH_API_SET_DEBUG_VMID = 13,
- MES_SCH_API_MISC = 14,
- MES_SCH_API_MAX = 0xFF
-};
-
-union MES_API_HEADER {
- struct {
- uint32_t type : 4; /* 0 - Invalid; 1 - Scheduling; 2 - TBD */
- uint32_t opcode : 8;
- uint32_t dwsize : 8; /* including header */
- uint32_t reserved : 12;
- };
-
- uint32_t u32All;
-};
-
-enum MES_AMD_PRIORITY_LEVEL {
- AMD_PRIORITY_LEVEL_LOW = 0,
- AMD_PRIORITY_LEVEL_NORMAL = 1,
- AMD_PRIORITY_LEVEL_MEDIUM = 2,
- AMD_PRIORITY_LEVEL_HIGH = 3,
- AMD_PRIORITY_LEVEL_REALTIME = 4,
- AMD_PRIORITY_NUM_LEVELS
-};
-
-enum MES_QUEUE_TYPE {
- MES_QUEUE_TYPE_GFX,
- MES_QUEUE_TYPE_COMPUTE,
- MES_QUEUE_TYPE_SDMA,
- MES_QUEUE_TYPE_MAX,
-};
-
-struct MES_API_STATUS {
- uint64_t api_completion_fence_addr;
- uint64_t api_completion_fence_value;
-};
-
-enum { MAX_COMPUTE_PIPES = 8 };
-enum { MAX_GFX_PIPES = 2 };
-enum { MAX_SDMA_PIPES = 2 };
-
-enum { MAX_COMPUTE_HQD_PER_PIPE = 8 };
-enum { MAX_GFX_HQD_PER_PIPE = 8 };
-enum { MAX_SDMA_HQD_PER_PIPE = 10 };
-
-enum { MAX_QUEUES_IN_A_GANG = 8 };
-
-enum VM_HUB_TYPE {
- VM_HUB_TYPE_GC = 0,
- VM_HUB_TYPE_MM = 1,
- VM_HUB_TYPE_MAX,
-};
-
-enum { VMID_INVALID = 0xffff };
-
-enum { MAX_VMID_GCHUB = 16 };
-enum { MAX_VMID_MMHUB = 16 };
-
-enum MES_LOG_OPERATION {
- MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
-};
-
-enum MES_LOG_CONTEXT_STATE {
- MES_LOG_CONTEXT_STATE_IDLE = 0,
- MES_LOG_CONTEXT_STATE_RUNNING = 1,
- MES_LOG_CONTEXT_STATE_READY = 2,
- MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
-};
-
-struct MES_LOG_CONTEXT_STATE_CHANGE {
- void *h_context;
- enum MES_LOG_CONTEXT_STATE new_context_state;
-};
-
-struct MES_LOG_ENTRY_HEADER {
- uint32_t first_free_entry_index;
- uint32_t wraparound_count;
- uint64_t number_of_entries;
- uint64_t reserved[2];
-};
-
-struct MES_LOG_ENTRY_DATA {
- uint64_t gpu_time_stamp;
- uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
- uint32_t reserved_operation_type_bits;
- union {
- struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
- uint64_t reserved_operation_data[2];
- };
-};
-
-struct MES_LOG_BUFFER {
- struct MES_LOG_ENTRY_HEADER header;
- struct MES_LOG_ENTRY_DATA entries[1];
-};
-
-union MESAPI_SET_HW_RESOURCES {
- struct {
- union MES_API_HEADER header;
- uint32_t vmid_mask_mmhub;
- uint32_t vmid_mask_gfxhub;
- uint32_t gds_size;
- uint32_t paging_vmid;
- uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
- uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
- uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
- uint64_t g_sch_ctx_gpu_mc_ptr;
- uint64_t query_status_fence_gpu_mc_ptr;
- struct MES_API_STATUS api_status;
- union {
- struct {
- uint32_t disable_reset : 1;
- uint32_t reserved : 31;
- };
- uint32_t uint32_t_all;
- };
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__ADD_QUEUE {
- struct {
- union MES_API_HEADER header;
- uint32_t process_id;
- uint64_t page_table_base_addr;
- uint64_t process_va_start;
- uint64_t process_va_end;
- uint64_t process_quantum;
- uint64_t process_context_addr;
- uint64_t gang_quantum;
- uint64_t gang_context_addr;
- uint32_t inprocess_gang_priority;
- enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
- uint32_t doorbell_offset;
- uint64_t mqd_addr;
- uint64_t wptr_addr;
- enum MES_QUEUE_TYPE queue_type;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
-
- struct {
- uint32_t paging : 1;
- uint32_t debug_vmid : 4;
- uint32_t program_gds : 1;
- uint32_t is_gang_suspended : 1;
- uint32_t is_tmz_queue : 1;
- uint32_t reserved : 24;
- };
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__REMOVE_QUEUE {
- struct {
- union MES_API_HEADER header;
- uint32_t doorbell_offset;
- uint64_t gang_context_addr;
-
- struct {
- uint32_t unmap_legacy_gfx_queue : 1;
- uint32_t reserved : 31;
- };
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_SCHEDULING_CONFIG {
- struct {
- union MES_API_HEADER header;
- /* Grace period when preempting another priority band for this
- * priority band. The value for idle priority band is ignored,
- * as it never preempts other bands.
- */
- uint64_t grace_period_other_levels[AMD_PRIORITY_NUM_LEVELS];
- /* Default quantum for scheduling across processes within
- * a priority band.
- */
- uint64_t process_quantum_for_level[AMD_PRIORITY_NUM_LEVELS];
- /* Default grace period for processes that preempt each other
- * within a priority band.
- */
- uint64_t process_grace_period_same_level[AMD_PRIORITY_NUM_LEVELS];
- /* For normal level this field specifies the target GPU
- * percentage in situations when it's starved by the high level.
- * Valid values are between 0 and 50, with the default being 10.
- */
- uint32_t normal_yield_percent;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__PERFORM_YIELD {
- struct {
- union MES_API_HEADER header;
- uint32_t dummy;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__CHANGE_GANG_PRIORITY_LEVEL {
- struct {
- union MES_API_HEADER header;
- uint32_t inprocess_gang_priority;
- enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
- uint64_t gang_quantum;
- uint64_t gang_context_addr;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SUSPEND {
- struct {
- union MES_API_HEADER header;
- /* false - suspend all gangs; true - specific gang */
- struct {
- uint32_t suspend_all_gangs : 1;
- uint32_t reserved : 31;
- };
- /* gang_context_addr is valid only if suspend_all = false */
- uint64_t gang_context_addr;
-
- uint64_t suspend_fence_addr;
- uint32_t suspend_fence_value;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__RESUME {
- struct {
- union MES_API_HEADER header;
- /* false - resume all gangs; true - specified gang */
- struct {
- uint32_t resume_all_gangs : 1;
- uint32_t reserved : 31;
- };
- /* valid only if resume_all_gangs = false */
- uint64_t gang_context_addr;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__RESET {
- struct {
- union MES_API_HEADER header;
-
- struct {
- uint32_t reset_queue : 1;
- uint32_t reserved : 31;
- };
-
- uint64_t gang_context_addr;
- uint32_t doorbell_offset; /* valid only if reset_queue = true */
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_LOGGING_BUFFER {
- struct {
- union MES_API_HEADER header;
- /* There are separate log buffers for each queue type */
- enum MES_QUEUE_TYPE log_type;
- /* Log buffer GPU Address */
- uint64_t logging_buffer_addr;
- /* number of entries in the log buffer */
- uint32_t number_of_entries;
- /* Entry index at which CPU interrupt needs to be signalled */
- uint32_t interrupt_entry;
-
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__QUERY_MES_STATUS {
- struct {
- union MES_API_HEADER header;
- bool mes_healthy; /* 0 - not healthy, 1 - healthy */
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__PROGRAM_GDS {
- struct {
- union MES_API_HEADER header;
- uint64_t process_context_addr;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
- struct MES_API_STATUS api_status;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-union MESAPI__SET_DEBUG_VMID {
- struct {
- union MES_API_HEADER header;
- struct MES_API_STATUS api_status;
- union {
- struct {
- uint32_t use_gds : 1;
- uint32_t reserved : 31;
- } flags;
- uint32_t u32All;
- };
- uint32_t reserved;
- uint32_t debug_vmid;
- uint64_t process_context_addr;
- uint64_t page_table_base_addr;
- uint64_t process_va_start;
- uint64_t process_va_end;
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_mask;
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-enum MESAPI_MISC_OPCODE {
- MESAPI_MISC__MODIFY_REG,
- MESAPI_MISC__MAX,
-};
-
-enum MODIFY_REG_SUBCODE {
- MODIFY_REG__OVERWRITE,
- MODIFY_REG__RMW_OR,
- MODIFY_REG__RMW_AND,
- MODIFY_REG__MAX,
-};
-
-enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
-
-union MESAPI__MISC {
- struct {
- union MES_API_HEADER header;
- enum MESAPI_MISC_OPCODE opcode;
- struct MES_API_STATUS api_status;
-
- union {
- struct {
- enum MODIFY_REG_SUBCODE subcode;
- uint32_t reg_offset;
- uint32_t reg_value;
- } modify_reg;
- uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
- };
- };
-
- uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
-};
-
-#pragma pack(pop)
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 622aa17b18e7..030a92b3a0da 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -133,6 +133,8 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
{
struct amdgpu_device *adev = mes->adev;
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
@@ -141,8 +143,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.process_id = input->process_id;
- mes_add_queue_pkt.page_table_base_addr =
- input->page_table_base_addr - adev->gmc.vram_start;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
mes_add_queue_pkt.process_va_start = input->process_va_start;
mes_add_queue_pkt.process_va_end = input->process_va_end;
mes_add_queue_pkt.process_quantum = input->process_quantum;
@@ -159,6 +160,10 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
@@ -192,6 +197,44 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
}
+static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ if (input->queue_type == AMDGPU_RING_TYPE_GFX)
+ mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
+ else
+ mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+ }
+
+ mes_remove_queue_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_remove_queue_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+}
+
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input)
{
@@ -254,9 +297,21 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.agreegated_doorbells[i] =
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->agreegated_doorbells[i];
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_set_hw_res_pkt.api_status.api_completion_fence_value =
@@ -269,6 +324,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.add_hw_queue = mes_v10_1_add_hw_queue,
.remove_hw_queue = mes_v10_1_remove_hw_queue,
+ .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
.suspend_gang = mes_v10_1_suspend_gang,
.resume_gang = mes_v10_1_resume_gang,
};
@@ -1097,6 +1153,13 @@ static int mes_v10_1_hw_init(void *handle)
goto failure;
}
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq.ring.sched.ready = false;
+
return 0;
failure: