aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorMaxime Ripard <maxime@cerno.tech>2022-10-18 15:00:03 +0200
committerMaxime Ripard <maxime@cerno.tech>2022-10-18 15:00:03 +0200
commita140a6a2d5ec0329ad05cd3532a91ad0ce58dceb (patch)
treeb2d44a1da423c53bd6c3ab3facd45ff5f2087ffd /drivers/gpu/drm/amd/amdgpu
parent28743e25fa1c867675bd8ff976eb92d4251f13a1 (diff)
parent9abf2313adc1ca1b6180c508c25f22f9395cc780 (diff)
Merge drm/drm-next into drm-misc-next
Let's kick-off this release cycle. Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c1216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c140
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c272
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c156
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c168
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c282
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c310
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c129
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c4
75 files changed, 2279 insertions, 2097 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 79bb6fd83094..ae9371b172e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -885,6 +885,7 @@ struct amdgpu_device {
u64 fence_context;
unsigned num_rings;
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct dma_fence __rcu *gang_submit;
bool ib_pool_ready;
struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX];
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
@@ -1294,6 +1295,8 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
u32 reg);
void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
u32 reg, u32 v);
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 55402d238919..b14800ac179e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
@@ -849,6 +850,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
if (amdgpu_device_has_dc_support(adev)) {
#if defined(CONFIG_DRM_AMD_DC)
struct amdgpu_display_manager *dm = &adev->dm;
+
if (dm->backlight_dev[0])
atif->bd = dm->backlight_dev[0];
#endif
@@ -863,6 +865,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
enc->enc_priv) {
struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+
if (dig->bl_dev) {
atif->bd = dig->bl_dev;
break;
@@ -919,9 +922,9 @@ static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev)
return false;
status = acpi_get_handle(dhandle, "ATIF", &atif_handle);
- if (ACPI_FAILURE(status)) {
+ if (ACPI_FAILURE(status))
return false;
- }
+
amdgpu_acpi_priv.atif.handle = atif_handle;
acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer);
DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
@@ -954,9 +957,9 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
return false;
status = acpi_get_handle(dhandle, "ATCS", &atcs_handle);
- if (ACPI_FAILURE(status)) {
+ if (ACPI_FAILURE(status))
return false;
- }
+
amdgpu_acpi_priv.atcs.handle = atcs_handle;
acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer);
DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
@@ -1050,6 +1053,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 091415a4abf0..03bbfaa51cbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
@@ -74,9 +75,6 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
return;
adev->kfd.dev = kgd2kfd_probe(adev, vf);
-
- if (adev->kfd.dev)
- amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
}
/**
@@ -130,6 +128,7 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
kfd.reset_work);
struct amdgpu_reset_context reset_context;
+
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
@@ -199,6 +198,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
adev_to_drm(adev), &gpu_resources);
+ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
@@ -208,6 +209,7 @@ void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
if (adev->kfd.dev) {
kgd2kfd_device_exit(adev->kfd.dev);
adev->kfd.dev = NULL;
+ amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
}
}
@@ -684,6 +686,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
ib->length_dw = ib_len;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
+ job->num_ibs = 1;
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
@@ -753,11 +756,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo
{
struct ras_err_data err_data = {0, 0, 0, NULL};
- /* CPU MCA will handle page retirement if connected_to_cpu is 1 */
- if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_poison_handler(adev, &err_data, reset);
- else if (reset)
- amdgpu_amdkfd_gpu_reset(adev);
+ amdgpu_umc_poison_handler(adev, &err_data, reset);
}
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2170db83e41d..978d3970b5cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014-2018 Advanced Micro Devices, Inc.
*
@@ -297,7 +298,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
*/
replacement = dma_fence_get_stub();
dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
- replacement, DMA_RESV_USAGE_READ);
+ replacement, DMA_RESV_USAGE_BOOKKEEP);
dma_fence_put(replacement);
return 0;
}
@@ -1390,8 +1391,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
- amdgpu_bo_fence(vm->root.bo,
- &vm->process_info->eviction_fence->base, true);
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv,
+ &vm->process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
amdgpu_bo_unreserve(vm->root.bo);
/* Update process info */
@@ -1612,6 +1614,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t available;
+
spin_lock(&kfd_mem_limit.mem_limit_lock);
available = adev->gmc.real_vram_size
- adev->kfd.vram_used_aligned
@@ -1987,9 +1990,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
}
if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- amdgpu_bo_fence(bo,
- &avm->process_info->eviction_fence->base,
- true);
+ dma_resv_add_fence(bo->tbo.base.resv,
+ &avm->process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
@@ -2216,7 +2219,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
{
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
- mb();
+ mb(); /* make sure read happened */
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
@@ -2758,15 +2761,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
if (mem->bo->tbo.pin_count)
continue;
- amdgpu_bo_fence(mem->bo,
- &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(mem->bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
}
/* Attach eviction fence to PD / PT BOs */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->root.bo;
- amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
}
validate_map_fail:
@@ -2820,7 +2826,9 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
- amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+ dma_resv_add_fence(gws_bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
amdgpu_bo_unreserve(gws_bo);
mutex_unlock(&(*mem)->process_info->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index b7933c2ce765..491d4846fc02 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1674,10 +1674,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
adev->mode_info.dither_property,
AMDGPU_FMT_DITHER_DISABLE);
- if (amdgpu_audio != 0)
+ if (amdgpu_audio != 0) {
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
subpixel_order = SubPixelHorizontalRGB;
connector->interlace_allowed = true;
@@ -1799,6 +1801,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1852,6 +1855,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
@@ -1902,6 +1906,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.audio_property,
AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
}
drm_object_attach_property(&amdgpu_connector->base.base,
adev->mode_info.dither_property,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b7bae833c804..1bbd39b3b0fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -39,9 +39,82 @@
#include "amdgpu_gem.h"
#include "amdgpu_ras.h"
-static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_cs_chunk_fence *data,
- uint32_t *offset)
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+ struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ if (cs->in.num_chunks == 0)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->adev = adev;
+ p->filp = filp;
+
+ p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+ if (!p->ctx)
+ return -EINVAL;
+
+ if (atomic_read(&p->ctx->guilty)) {
+ amdgpu_ctx_put(p->ctx);
+ return -ECANCELED;
+ }
+ return 0;
+}
+
+static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib)
+{
+ struct drm_sched_entity *entity;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance,
+ chunk_ib->ring, &entity);
+ if (r)
+ return r;
+
+ /*
+ * Abort if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP.
+ */
+ if (entity->rq == NULL)
+ return -EINVAL;
+
+ /* Check if we can add this IB to some existing job */
+ for (i = 0; i < p->gang_size; ++i)
+ if (p->entities[i] == entity)
+ return i;
+
+ /* If not increase the gang size if possible */
+ if (i == AMDGPU_CS_GANG_SIZE)
+ return -EINVAL;
+
+ p->entities[i] = entity;
+ p->gang_size = i + 1;
+ return i;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+ unsigned int *num_ibs)
+{
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ ++(num_ibs[r]);
+ return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_fence *data,
+ uint32_t *offset)
{
struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
@@ -80,11 +153,11 @@ error_unref:
return r;
}
-static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_bo_list_in *data)
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
{
+ struct drm_amdgpu_bo_list_entry *info;
int r;
- struct drm_amdgpu_bo_list_entry *info = NULL;
r = amdgpu_bo_create_list_entry_array(data, &info);
if (r)
@@ -104,38 +177,25 @@ error_free:
return r;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = &fpriv->vm;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
- unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
- int i;
+ unsigned int size;
int ret;
+ int i;
- if (cs->in.num_chunks == 0)
- return -EINVAL;
-
- chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
+ GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
- p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
- if (!p->ctx) {
- ret = -EINVAL;
- goto free_chunk;
- }
-
- mutex_lock(&p->ctx->lock);
-
- /* skip guilty context job */
- if (atomic_read(&p->ctx->guilty) == 1) {
- ret = -ECANCELED;
- goto free_chunk;
- }
-
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
@@ -170,7 +230,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
size = p->chunks[i].length_dw;
cdata = u64_to_user_ptr(user_chunk.chunk_data);
- p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
+ p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
+ GFP_KERNEL);
if (p->chunks[i].kdata == NULL) {
ret = -ENOMEM;
i--;
@@ -182,36 +243,35 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_partial_kdata;
}
+ /* Assume the worst on the following checks */
+ ret = -EINVAL;
switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
- ++num_ibs;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
+ if (ret)
+ goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_FENCE:
- size = sizeof(struct drm_amdgpu_cs_chunk_fence);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
- &uf_offset);
+ ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
+ &uf_offset);
if (ret)
goto free_partial_kdata;
-
break;
case AMDGPU_CHUNK_ID_BO_HANDLES:
- size = sizeof(struct drm_amdgpu_bo_list_in);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_bo_list_in))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+ ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
if (ret)
goto free_partial_kdata;
-
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
@@ -223,22 +283,32 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
break;
default:
- ret = -EINVAL;
goto free_partial_kdata;
}
}
- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
- if (ret)
- goto free_all_kdata;
+ if (!p->gang_size)
+ return -EINVAL;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm);
+ if (ret)
+ goto free_all_kdata;
+
+ ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i],
+ &fpriv->vm);
+ if (ret)
+ goto free_all_kdata;
+ }
+ p->gang_leader = p->jobs[p->gang_size - 1];
- if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+ if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
ret = -ECANCELED;
goto free_all_kdata;
}
if (p->uf_entry.tv.bo)
- p->job->uf_addr = uf_offset;
+ p->gang_leader->uf_addr = uf_offset;
kvfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
@@ -260,6 +330,297 @@ free_chunk:
return ret;
}
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk,
+ unsigned int *ce_preempt,
+ unsigned int *de_preempt)
+{
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_ring *ring;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ job = p->jobs[r];
+ ring = amdgpu_job_ring(job);
+ ib = &job->ibs[job->num_ibs++];
+
+ /* MM engine doesn't support user fences */
+ if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
+ return -EINVAL;
+
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ (*ce_preempt)++;
+ else
+ (*de_preempt)++;
+
+ /* Each GFX command submit allows only 1 IB max
+ * preemptible for CE & DE */
+ if (*ce_preempt > 1 || *de_preempt > 1)
+ return -EINVAL;
+ }
+
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+
+ r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
+ if (r) {
+ DRM_ERROR("Failed to get ib !\n");
+ return r;
+ }
+
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+ return 0;
+}
+
+static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
+ struct dma_fence *fence;
+
+ ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+ amdgpu_ctx_put(ctx);
+
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+ struct drm_sched_fence *s_fence;
+ struct dma_fence *old = fence;
+
+ s_fence = to_drm_sched_fence(fence);
+ fence = dma_fence_get(&s_fence->scheduled);
+ dma_fence_put(old);
+ }
+
+ r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
+ uint32_t handle, u64 point,
+ u64 flags)
+{
+ struct dma_fence *fence;
+ int r;
+
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ if (r) {
+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
+ return r;
+ }
+
+ r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
+ dma_fence_put(fence);
+
+ return r;
+}
+
+static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+
+ for (i = 0; i < num_deps; ++i) {
+ p->post_deps[i].syncobj =
+ drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_deps[i].syncobj)
+ return -EINVAL;
+ p->post_deps[i].chain = NULL;
+ p->post_deps[i].point = 0;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+ dep->chain = NULL;
+ if (syncobj_deps[i].point) {
+ dep->chain = dma_fence_chain_alloc();
+ if (!dep->chain)
+ return -ENOMEM;
+ }
+
+ dep->syncobj = drm_syncobj_find(p->filp,
+ syncobj_deps[i].handle);
+ if (!dep->syncobj) {
+ dma_fence_chain_free(dep->chain);
+ return -EINVAL;
+ }
+ dep->point = syncobj_deps[i].point;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
+{
+ unsigned int ce_preempt = 0, de_preempt = 0;
+ int i, r;
+
+ for (i = 0; i < p->nchunks; ++i) {
+ struct amdgpu_cs_chunk *chunk;
+
+ chunk = &p->chunks[i];
+
+ switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_IB:
+ r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+ r = amdgpu_cs_p2_dependencies(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ r = amdgpu_cs_p2_syncobj_in(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ r = amdgpu_cs_p2_syncobj_out(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
+ if (r)
+ return r;
+ break;
+ }
+ }
+
+ return 0;
+}
+
/* Convert microseconds to bytes. */
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
{
@@ -495,9 +856,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
- struct amdgpu_bo *gds;
- struct amdgpu_bo *gws;
- struct amdgpu_bo *oa;
+ unsigned int i;
int r;
INIT_LIST_HEAD(&p->validated);
@@ -581,16 +940,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
- /* Move fence waiting after getting reservation lock of
- * PD root. Then there is no need on a ctx mutex lock.
- */
- r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
- goto error_validate;
- }
-
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
@@ -611,197 +960,139 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (r)
goto error_validate;
- amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
- p->bytes_moved_vis);
+ if (p->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
+ r = amdgpu_ttm_alloc_gart(&uf->tbo);
+ if (r)
+ goto error_validate;
- if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
- p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
- }
- if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
- p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
- }
- if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
- p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+ p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf);
}
- if (!r && p->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
+ amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+ p->bytes_moved_vis);
- r = amdgpu_ttm_alloc_gart(&uf->tbo);
- p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
- }
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
+ p->bo_list->gws_obj,
+ p->bo_list->oa_obj);
+ return 0;
error_validate:
- if (r)
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+ ttm_eu_backoff_reservation(&p->ticket, &p->validated);
out_free_user_pages:
- if (r) {
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (!e->user_pages)
- continue;
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
- kvfree(e->user_pages);
- e->user_pages = NULL;
- }
- mutex_unlock(&p->bo_list->bo_list_mutex);
+ if (!e->user_pages)
+ continue;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
}
return r;
}
-static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_bo_list_entry *e;
- int r;
+ int i, j;
- list_for_each_entry(e, &p->validated, tv.head) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- struct dma_resv *resv = bo->tbo.base.resv;
- enum amdgpu_sync_mode sync_mode;
+ if (!trace_amdgpu_cs_enabled())
+ return;
- sync_mode = amdgpu_bo_explicit_sync(bo) ?
- AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
- &fpriv->vm);
- if (r)
- return r;
+ for (i = 0; i < p->gang_size; ++i) {
+ struct amdgpu_job *job = p->jobs[i];
+
+ for (j = 0; j < job->num_ibs; ++j)
+ trace_amdgpu_cs(p, job, &job->ibs[j]);
}
- return 0;
}
-/**
- * amdgpu_cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- * @backoff: indicator to backoff the reservation
- *
- * If error is set then unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
- bool backoff)
+static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
- unsigned i;
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ unsigned int i;
+ int r;
- if (error && backoff) {
- ttm_eu_backoff_reservation(&parser->ticket,
- &parser->validated);
- mutex_unlock(&parser->bo_list->bo_list_mutex);
- }
+ /* Only for UVD/VCE VM emulation */
+ if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
+ return 0;
- for (i = 0; i < parser->num_post_deps; i++) {
- drm_syncobj_put(parser->post_deps[i].syncobj);
- kfree(parser->post_deps[i].chain);
- }
- kfree(parser->post_deps);
+ for (i = 0; i < job->num_ibs; ++i) {
+ struct amdgpu_ib *ib = &job->ibs[i];
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj;
+ uint64_t va_start;
+ uint8_t *kptr;
- dma_fence_put(parser->fence);
+ va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+ if (r) {
+ DRM_ERROR("IB va_start is invalid\n");
+ return r;
+ }
- if (parser->ctx) {
- mutex_unlock(&parser->ctx->lock);
- amdgpu_ctx_put(parser->ctx);
+ if ((va_start + ib->length_dw * 4) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
+
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r) {
+ return r;
+ }
+
+ kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
+
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, ib->length_dw * 4);
+ amdgpu_bo_kunmap(aobj);
+
+ r = amdgpu_ring_parse_cs(ring, p, job, ib);
+ if (r)
+ return r;
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
}
- if (parser->bo_list)
- amdgpu_bo_list_put(parser->bo_list);
- for (i = 0; i < parser->nchunks; i++)
- kvfree(parser->chunks[i].kdata);
- kvfree(parser->chunks);
- if (parser->job)
- amdgpu_job_free(parser->job);
- if (parser->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+ return 0;
+}
- amdgpu_bo_unref(&uf);
+static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
+ if (r)
+ return r;
}
+ return 0;
}
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *job = p->gang_leader;
struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct amdgpu_bo_va *bo_va;
struct amdgpu_bo *bo;
+ unsigned int i;
int r;
- /* Only for UVD/VCE VM emulation */
- if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
- unsigned i, j;
-
- for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_bo_va_mapping *m;
- struct amdgpu_bo *aobj = NULL;
- struct amdgpu_cs_chunk *chunk;
- uint64_t offset, va_start;
- struct amdgpu_ib *ib;
- uint8_t *kptr;
-
- chunk = &p->chunks[i];
- ib = &p->job->ibs[j];
- chunk_ib = chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
- if (r) {
- DRM_ERROR("IB va_start is invalid\n");
- return r;
- }
-
- if ((va_start + chunk_ib->ib_bytes) >
- (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
- return -EINVAL;
- }
-
- /* the IB should be reserved at this point */
- r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
- return r;
- }
-
- offset = m->start * AMDGPU_GPU_PAGE_SIZE;
- kptr += va_start - offset;
-
- if (ring->funcs->parse_cs) {
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
-
- r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
- if (r)
- return r;
- } else {
- ib->ptr = (uint32_t *)kptr;
- r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
- amdgpu_bo_kunmap(aobj);
- if (r)
- return r;
- }
-
- j++;
- }
- }
-
- if (!p->job->vm)
- return amdgpu_cs_sync_rings(p);
-
-
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
@@ -810,18 +1101,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (fpriv->csa_va) {
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -840,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -853,11 +1144,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
+ r = amdgpu_sync_fence(&job->sync, vm->last_update);
if (r)
return r;
- p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ for (i = 0; i < p->gang_size; ++i) {
+ job = p->jobs[i];
+
+ if (!job->vm)
+ continue;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ }
if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */
@@ -872,331 +1170,40 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
}
}
- return amdgpu_cs_sync_rings(p);
-}
-
-static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
-{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- int r, ce_preempt = 0, de_preempt = 0;
- struct amdgpu_ring *ring;
- int i, j;
-
- for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
- struct amdgpu_cs_chunk *chunk;
- struct amdgpu_ib *ib;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct drm_sched_entity *entity;
-
- chunk = &parser->chunks[i];
- ib = &parser->job->ibs[j];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
- (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
- ce_preempt++;
- else
- de_preempt++;
- }
-
- /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
- if (ce_preempt > 1 || de_preempt > 1)
- return -EINVAL;
- }
-
- r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &entity);
- if (r)
- return r;
-
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
- parser->job->preamble_status |=
- AMDGPU_PREAMBLE_IB_PRESENT;
-
- if (parser->entity && parser->entity != entity)
- return -EINVAL;
-
- /* Return if there is no run queue associated with this entity.
- * Possibly because of disabled HW IP*/
- if (entity->rq == NULL)
- return -EINVAL;
-
- parser->entity = entity;
-
- ring = to_amdgpu_ring(entity->rq->sched);
- r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0,
- AMDGPU_IB_POOL_DELAYED, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
- }
-
- ib->gpu_addr = chunk_ib->va_start;
- ib->length_dw = chunk_ib->ib_bytes / 4;
- ib->flags = chunk_ib->flags;
-
- j++;
- }
-
- /* MM engine doesn't support user fences */
- ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && ring->funcs->no_user_fence)
- return -EINVAL;
-
return 0;
}
-static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- unsigned num_deps;
- int i, r;
- struct drm_amdgpu_cs_chunk_dep *deps;
-
- deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_dep);
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_ctx *ctx;
- struct drm_sched_entity *entity;
- struct dma_fence *fence;
-
- ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
- if (ctx == NULL)
- return -EINVAL;
-
- r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
- deps[i].ip_instance,
- deps[i].ring, &entity);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
-
- fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
- amdgpu_ctx_put(ctx);
-
- if (IS_ERR(fence))
- return PTR_ERR(fence);
- else if (!fence)
- continue;
-
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
- struct drm_sched_fence *s_fence;
- struct dma_fence *old = fence;
-
- s_fence = to_drm_sched_fence(fence);
- fence = dma_fence_get(&s_fence->scheduled);
- dma_fence_put(old);
- }
-
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
- if (r)
- return r;
- }
- return 0;
-}
-
-static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
- uint32_t handle, u64 point,
- u64 flags)
-{
- struct dma_fence *fence;
+ struct amdgpu_job *leader = p->gang_leader;
+ struct amdgpu_bo_list_entry *e;
+ unsigned int i;
int r;
- r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
- if (r) {
- DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
- handle, point, r);
- return r;
- }
-
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
-
- return r;
-}
-
-static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i, r;
+ list_for_each_entry(e, &p->validated, tv.head) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ struct dma_resv *resv = bo->tbo.base.resv;
+ enum amdgpu_sync_mode sync_mode;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
- 0, 0);
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+ r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
+ &fpriv->vm);
if (r)
return r;
}
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i, r;
-
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p,
- syncobj_deps[i].handle,
- syncobj_deps[i].point,
- syncobj_deps[i].flags);
+ for (i = 0; i < p->gang_size - 1; ++i) {
+ r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
if (r)
return r;
}
- return 0;
-}
-
-static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i;
-
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
-
- for (i = 0; i < num_deps; ++i) {
- p->post_deps[i].syncobj =
- drm_syncobj_find(p->filp, deps[i].handle);
- if (!p->post_deps[i].syncobj)
- return -EINVAL;
- p->post_deps[i].chain = NULL;
- p->post_deps[i].point = 0;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i;
-
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
-
- dep->chain = NULL;
- if (syncobj_deps[i].point) {
- dep->chain = dma_fence_chain_alloc();
- if (!dep->chain)
- return -ENOMEM;
- }
-
- dep->syncobj = drm_syncobj_find(p->filp,
- syncobj_deps[i].handle);
- if (!dep->syncobj) {
- dma_fence_chain_free(dep->chain);
- return -EINVAL;
- }
- dep->point = syncobj_deps[i].point;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
-{
- int i, r;
-
- /* TODO: Investigate why we still need the context lock */
- mutex_unlock(&p->ctx->lock);
-
- for (i = 0; i < p->nchunks; ++i) {
- struct amdgpu_cs_chunk *chunk;
-
- chunk = &p->chunks[i];
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
+ if (r && r != -ERESTARTSYS)
+ DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
- switch (chunk->chunk_id) {
- case AMDGPU_CHUNK_ID_DEPENDENCIES:
- case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
- r = amdgpu_cs_process_fence_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
- r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
- r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
- r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
- r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
- if (r)
- goto out;
- break;
- }
- }
-
-out:
- mutex_lock(&p->ctx->lock);
return r;
}
@@ -1221,20 +1228,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct drm_sched_entity *entity = p->entity;
+ struct amdgpu_job *leader = p->gang_leader;
struct amdgpu_bo_list_entry *e;
- struct amdgpu_job *job;
+ unsigned int i;
uint64_t seq;
int r;
- job = p->job;
- p->job = NULL;
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_arm(&p->jobs[i]->base);
- r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
- if (r)
- goto error_unlock;
+ for (i = 0; i < (p->gang_size - 1); ++i) {
+ struct dma_fence *fence;
- drm_sched_job_arm(&job->base);
+ fence = &p->jobs[i]->base.s_fence->scheduled;
+ r = amdgpu_sync_fence(&leader->sync, fence);
+ if (r)
+ goto error_cleanup;
+ }
+
+ if (p->gang_size > 1) {
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_gang_leader(p->jobs[i], leader);
+ }
/* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
@@ -1245,6 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
*/
+ r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
@@ -1252,67 +1268,96 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
if (r) {
r = -EAGAIN;
- goto error_abort;
+ goto error_unlock;
}
- p->fence = dma_fence_get(&job->base.s_fence->finished);
+ p->fence = dma_fence_get(&leader->base.s_fence->finished);
+ list_for_each_entry(e, &p->validated, tv.head) {
+
+ /* Everybody except for the gang leader uses READ */
+ for (i = 0; i < (p->gang_size - 1); ++i) {
+ dma_resv_add_fence(e->tv.bo->base.resv,
+ &p->jobs[i]->base.s_fence->finished,
+ DMA_RESV_USAGE_READ);
+ }
- seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
+ /* The gang leader is remembered as writer */
+ e->tv.num_shared = 0;
+ }
+
+ seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
+ p->fence);
amdgpu_cs_post_dependencies(p);
- if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
!p->ctx->preamble_presented) {
- job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
}
cs->out.handle = seq;
- job->uf_sequence = seq;
-
- amdgpu_job_free_resources(job);
+ leader->uf_sequence = seq;
- trace_amdgpu_cs_ioctl(job);
amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
- drm_sched_entity_push_job(&job->base);
+ for (i = 0; i < p->gang_size; ++i) {
+ amdgpu_job_free_resources(p->jobs[i]);
+ trace_amdgpu_cs_ioctl(p->jobs[i]);
+ drm_sched_entity_push_job(&p->jobs[i]->base);
+ p->jobs[i] = NULL;
+ }
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
-
- /* Make sure all BOs are remembered as writers */
- amdgpu_bo_list_for_each_entry(e, p->bo_list)
- e->tv.num_shared = 0;
-
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
+
mutex_unlock(&p->adev->notifier_lock);
mutex_unlock(&p->bo_list->bo_list_mutex);
-
return 0;
-error_abort:
- drm_sched_job_cleanup(&job->base);
+error_unlock:
mutex_unlock(&p->adev->notifier_lock);
-error_unlock:
- amdgpu_job_free(job);
+error_cleanup:
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_cleanup(&p->jobs[i]->base);
return r;
}
-static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
+/* Cleanup the parser structure */
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
- int i;
+ unsigned i;
- if (!trace_amdgpu_cs_enabled())
- return;
+ for (i = 0; i < parser->num_post_deps; i++) {
+ drm_syncobj_put(parser->post_deps[i].syncobj);
+ kfree(parser->post_deps[i].chain);
+ }
+ kfree(parser->post_deps);
+
+ dma_fence_put(parser->fence);
+
+ if (parser->ctx)
+ amdgpu_ctx_put(parser->ctx);
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
+ for (i = 0; i < parser->nchunks; i++)
+ kvfree(parser->chunks[i].kdata);
+ kvfree(parser->chunks);
+ for (i = 0; i < parser->gang_size; ++i) {
+ if (parser->jobs[i])
+ amdgpu_job_free(parser->jobs[i]);
+ }
+ if (parser->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
- for (i = 0; i < parser->job->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ amdgpu_bo_unref(&uf);
+ }
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- union drm_amdgpu_cs *cs = data;
- struct amdgpu_cs_parser parser = {};
- bool reserved_buffers = false;
+ struct amdgpu_cs_parser parser;
int r;
if (amdgpu_ras_intr_triggered())
@@ -1321,25 +1366,20 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
- parser.adev = adev;
- parser.filp = filp;
-
- r = amdgpu_cs_parser_init(&parser, data);
+ r = amdgpu_cs_parser_init(&parser, adev, filp, data);
if (r) {
if (printk_ratelimit())
DRM_ERROR("Failed to initialize parser %d!\n", r);
- goto out;
+ return r;
}
- r = amdgpu_cs_ib_fill(adev, &parser);
+ r = amdgpu_cs_pass1(&parser, data);
if (r)
- goto out;
+ goto error_fini;
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r) {
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- goto out;
- }
+ r = amdgpu_cs_pass2(&parser);
+ if (r)
+ goto error_fini;
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
@@ -1347,22 +1387,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
DRM_ERROR("Not enough memory for command submission!\n");
else if (r != -ERESTARTSYS && r != -EAGAIN)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
- goto out;
+ goto error_fini;
}
- reserved_buffers = true;
+ r = amdgpu_cs_patch_jobs(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_vm_handling(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_sync_rings(&parser);
+ if (r)
+ goto error_backoff;
trace_amdgpu_cs_ibs(&parser);
- r = amdgpu_cs_vm_handling(&parser);
+ r = amdgpu_cs_submit(&parser, data);
if (r)
- goto out;
+ goto error_backoff;
- r = amdgpu_cs_submit(&parser, cs);
+ amdgpu_cs_parser_fini(&parser);
+ return 0;
-out:
- amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+error_backoff:
+ ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
+ mutex_unlock(&parser.bo_list->bo_list_mutex);
+error_fini:
+ amdgpu_cs_parser_fini(&parser);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
index 30ecc4917f81..cbaa19b2b8a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -27,6 +27,8 @@
#include "amdgpu_bo_list.h"
#include "amdgpu_ring.h"
+#define AMDGPU_CS_GANG_SIZE 4
+
struct amdgpu_bo_va_mapping;
struct amdgpu_cs_chunk {
@@ -50,9 +52,11 @@ struct amdgpu_cs_parser {
unsigned nchunks;
struct amdgpu_cs_chunk *chunks;
- /* scheduler job object */
- struct amdgpu_job *job;
- struct drm_sched_entity *entity;
+ /* scheduler job objects */
+ unsigned int gang_size;
+ struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *gang_leader;
/* buffer objects */
struct ww_acquire_ctx ticket;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3ea48385fab3..f6d9d5da53cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -315,7 +315,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
kref_init(&ctx->refcount);
ctx->mgr = mgr;
spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -407,7 +406,6 @@ static void amdgpu_ctx_fini(struct kref *ref)
drm_dev_exit(idx);
}
- mutex_destroy(&ctx->lock);
kfree(ctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index cc7c8afff414..0fa0e56daf67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -53,7 +53,6 @@ struct amdgpu_ctx {
bool preamble_presented;
int32_t init_priority;
int32_t override_priority;
- struct mutex lock;
atomic_t guilty;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62b26f0e37b0..ab8f970b2849 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2365,8 +2365,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
adev->ip_blocks[i].status.sw = true;
- /* need to do gmc hw init early so we can allocate gpu mem */
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ /* need to do common hw init early so everything is set up for gmc */
+ r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ if (r) {
+ DRM_ERROR("hw_init %d failed %d\n", i, r);
+ goto init_failed;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ /* need to do gmc hw init early so we can allocate gpu mem */
/* Try to reserve bad pages early */
if (amdgpu_sriov_vf(adev))
amdgpu_virt_exchange_data(adev);
@@ -2451,19 +2459,21 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
*/
if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (amdgpu_xgmi_add_device(adev) == 0) {
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ if (!amdgpu_sriov_vf(adev)) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
- if (!hive->reset_domain ||
- !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
- r = -ENOENT;
+ if (!hive->reset_domain ||
+ !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+ r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
+ goto init_failed;
+ }
+
+ /* Drop the early temporary reset domain we created for device */
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = hive->reset_domain;
amdgpu_put_xgmi_hive(hive);
- goto init_failed;
}
-
- /* Drop the early temporary reset domain we created for device */
- amdgpu_reset_put_reset_domain(adev->reset_domain);
- adev->reset_domain = hive->reset_domain;
- amdgpu_put_xgmi_hive(hive);
}
}
@@ -3052,8 +3062,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
int i, r;
static enum amd_ip_block_type ip_order[] = {
- AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON,
+ AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_IH,
};
@@ -3144,7 +3154,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@@ -3501,6 +3512,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gmc.gart_size = 512 * 1024 * 1024;
adev->accel_working = false;
adev->num_rings = 0;
+ RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
@@ -3982,6 +3994,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
+ dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
amdgpu_reset_fini(adev);
@@ -4057,12 +4070,20 @@ static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ int r = 0;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
adev->in_suspend = true;
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+ if (r)
+ return r;
+ }
+
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
DRM_WARN("smart shift update failed\n");
@@ -4086,6 +4107,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_device_ip_suspend_phase2(adev);
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
return 0;
}
@@ -4104,6 +4128,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -4118,6 +4148,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
}
r = amdgpu_device_ip_resume(adev);
+
+ /* no matter what r is, always need to properly release full GPU */
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
+ }
+
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
return r;
@@ -4739,6 +4776,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset, skip_hw_reset, vram_lost = false;
int r = 0;
+ bool gpu_reset_for_dev_remove = 0;
/* Try reset handler method first */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
@@ -4758,6 +4796,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+ gpu_reset_for_dev_remove =
+ test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
/*
* ASIC reset has to be done on all XGMI hive nodes ASAP
* to allow proper links negotiation in FW (within 1 sec)
@@ -4802,6 +4844,18 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
amdgpu_ras_intr_cleared();
}
+ /* Since the mode1 reset affects base ip blocks, the
+ * phase1 ip blocks need to be resumed. Otherwise there
+ * will be a BIOS signature error and the psp bootloader
+ * can't load kdb on the next amdgpu install.
+ */
+ if (gpu_reset_for_dev_remove) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list)
+ amdgpu_device_ip_resume_phase1(tmp_adev);
+
+ goto end;
+ }
+
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
if (need_full_reset) {
/* post card */
@@ -5124,6 +5178,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
bool need_emergency_restart = false;
bool audio_suspended = false;
int tmp_vram_lost_counter;
+ bool gpu_reset_for_dev_remove = false;
+
+ gpu_reset_for_dev_remove =
+ test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
/*
* Special case: RAS triggered and full reset isn't supported
@@ -5159,8 +5218,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*/
INIT_LIST_HEAD(&device_list);
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
list_add_tail(&tmp_adev->reset_list, &device_list);
+ if (gpu_reset_for_dev_remove && adev->shutdown)
+ tmp_adev->shutdown = true;
+ }
if (!list_is_first(&adev->reset_list, &device_list))
list_rotate_to_front(&adev->reset_list, &device_list);
device_list_handle = &device_list;
@@ -5243,6 +5305,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ if (gpu_reset_for_dev_remove) {
+ /* Workaroud for ASICs need to disable SMC first */
+ amdgpu_device_smu_fini_early(tmp_adev);
+ }
r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
@@ -5276,6 +5342,9 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
adev->asic_reset_res = 0;
goto retry;
}
+
+ if (!r && gpu_reset_for_dev_remove)
+ goto recover_end;
}
skip_hw_reset:
@@ -5349,6 +5418,7 @@ skip_sched_resume:
amdgpu_device_unset_mp1_state(tmp_adev);
}
+recover_end:
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
@@ -5531,9 +5601,9 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
adev->gmc.aper_base + adev->gmc.aper_size - 1;
- bool p2p_access = !adev->gmc.xgmi.connected_to_cpu &&
- !(pci_p2pdma_distance_many(adev->pdev,
- &peer_adev->dev, 1, true) < 0);
+ bool p2p_access =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
@@ -5917,3 +5987,36 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
(void)RREG32(data);
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+
+/**
+ * amdgpu_device_switch_gang - switch to a new gang
+ * @adev: amdgpu_device pointer
+ * @gang: the gang to switch to
+ *
+ * Try to switch to a new gang.
+ * Returns: NULL if we switched to the new gang or a reference to the current
+ * gang leader.
+ */
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang)
+{
+ struct dma_fence *old = NULL;
+
+ do {
+ dma_fence_put(old);
+ rcu_read_lock();
+ old = dma_fence_get_rcu_safe(&adev->gang_submit);
+ rcu_read_unlock();
+
+ if (old == gang)
+ break;
+
+ if (!dma_fence_is_signaled(old))
+ return old;
+
+ } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
+ old, gang) != old);
+
+ dma_fence_put(old);
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 9fa2a5ceb77d..3993e6134914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -229,7 +229,7 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, ui
return r;
}
- memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size);
+ memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
release_firmware(fw);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c20922a5af9f..1a06b8d724f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1101,6 +1101,7 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
goto err;
ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
if (ret)
goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 782cbca37538..7bd8e33b14be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -58,7 +58,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
- if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
+ if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 728a0933ea6f..3c9fecdd6b2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -38,6 +38,8 @@
#include <linux/mmu_notifier.h>
#include <linux/suspend.h>
#include <linux/cc_platform.h>
+#include <linux/fb.h>
+#include <linux/dynamic_debug.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
@@ -102,9 +104,10 @@
* - 3.46.0 - To enable hot plug amdgpu tests in libdrm
* - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
* - 3.48.0 - Add IP discovery version info to HW INFO
+ * 3.49.0 - Add gang submit into CS IOCTL
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 48
+#define KMS_DRIVER_MINOR 49
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit;
@@ -185,6 +188,18 @@ int amdgpu_vcnfw_log;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
+ "DRM_UT_CORE",
+ "DRM_UT_DRIVER",
+ "DRM_UT_KMS",
+ "DRM_UT_PRIME",
+ "DRM_UT_ATOMIC",
+ "DRM_UT_VBL",
+ "DRM_UT_STATE",
+ "DRM_UT_LEASE",
+ "DRM_UT_DP",
+ "DRM_UT_DRMRES");
+
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
.delayed_reset_work = __DELAYED_WORK_INITIALIZER(
@@ -2186,6 +2201,37 @@ amdgpu_pci_remove(struct pci_dev *pdev)
pm_runtime_forbid(dev->dev);
}
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) {
+ bool need_to_reset_gpu = false;
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ struct amdgpu_hive_info *hive;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive->device_remove_count == 0)
+ need_to_reset_gpu = true;
+ hive->device_remove_count++;
+ amdgpu_put_xgmi_hive(hive);
+ } else {
+ need_to_reset_gpu = true;
+ }
+
+ /* Workaround for ASICs need to reset SMU.
+ * Called only when the first device is removed.
+ */
+ if (need_to_reset_gpu) {
+ struct amdgpu_reset_context reset_context;
+
+ adev->shutdown = true;
+ memset(&reset_context, 0, sizeof(reset_context));
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+ }
+
amdgpu_driver_unload_kms(dev);
drm_dev_unplug(dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 8adeb7469f1e..d0d99ed607dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -400,7 +400,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
/* We are not protected by ring lock when reading the last sequence
* but it's ok to report slightly wrong fence count here.
*/
- amdgpu_fence_process(ring);
emitted = 0x100000000ull;
emitted -= atomic_read(&ring->fence_drv.last_seq);
emitted += READ_ONCE(ring->fence_drv.sync_seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ceb91469958a..9546adc8a76f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -23,6 +23,7 @@
*
*/
+#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
@@ -865,3 +866,142 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
}
return amdgpu_num_kcq;
}
+
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
+ uint32_t ucode_id)
+{
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct firmware *ucode_fw;
+ unsigned int fw_size;
+
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_CP_PFP:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.ce_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ default:
+ break;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[ucode_id];
+ info->ucode_id = ucode_id;
+ info->fw = ucode_fw;
+ adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8abdf41d0f83..832b3807f1d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -304,6 +304,10 @@ struct amdgpu_gfx {
uint32_t rlc_srlg_feature_version;
uint32_t rlc_srls_fw_version;
uint32_t rlc_srls_feature_version;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
@@ -422,4 +426,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index aebc384531ac..34233a74248c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -572,45 +572,15 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
{
struct amdgpu_gmc *gmc = &adev->gmc;
-
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 0, 1):
- case IP_VERSION(9, 3, 0):
- case IP_VERSION(9, 4, 0):
- case IP_VERSION(9, 4, 1):
- case IP_VERSION(9, 4, 2):
- case IP_VERSION(10, 3, 3):
- case IP_VERSION(10, 3, 4):
- case IP_VERSION(10, 3, 5):
- case IP_VERSION(10, 3, 6):
- case IP_VERSION(10, 3, 7):
- /*
- * noretry = 0 will cause kfd page fault tests fail
- * for some ASICs, so set default to 1 for these ASICs.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 1;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- default:
- /* Raven currently has issues with noretry
- * regardless of what we decide for other
- * asics, we should leave raven with
- * noretry = 0 until we root cause the
- * issues.
- *
- * default this to 0 for now, but we may want
- * to change this in the future for certain
- * GPUs as it can increase performance in
- * certain cases.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 0;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- }
+ uint32_t gc_ver = adev->ip_versions[GC_HWIP][0];
+ bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
+ gc_ver == IP_VERSION(9, 3, 0) ||
+ gc_ver == IP_VERSION(9, 4, 0) ||
+ gc_ver == IP_VERSION(9, 4, 1) ||
+ gc_ver == IP_VERSION(9, 4, 2) ||
+ gc_ver >= IP_VERSION(10, 3, 0));
+
+ gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
}
void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1062b7ed74ec..46c99331d7f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -105,7 +105,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
*/
(*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
- (*job)->num_ibs = num_ibs;
amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->sched_sync);
@@ -125,6 +124,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
if (r)
return r;
+ (*job)->num_ibs = 1;
r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
if (r)
kfree(*job);
@@ -132,6 +132,23 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
return r;
}
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa)
+{
+ if (gds) {
+ job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+ job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
+ }
+ if (gws) {
+ job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+ job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+ }
+ if (oa) {
+ job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+ job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+ }
+}
+
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
@@ -156,11 +173,29 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
dma_fence_put(&job->hw_fence);
}
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader)
+{
+ struct dma_fence *fence = &leader->base.s_fence->scheduled;
+
+ WARN_ON(job->gang_submit);
+
+ /*
+ * Don't add a reference when we are the gang leader to avoid circle
+ * dependency.
+ */
+ if (job != leader)
+ dma_fence_get(fence);
+ job->gang_submit = fence;
+}
+
void amdgpu_job_free(struct amdgpu_job *job)
{
amdgpu_job_free_resources(job);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
+ if (job->gang_submit != &job->base.s_fence->scheduled)
+ dma_fence_put(job->gang_submit);
if (!job->hw_fence.ops)
kfree(job);
@@ -230,12 +265,16 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
fence = amdgpu_sync_get_fence(&job->sync);
}
+ if (!fence && job->gang_submit)
+ fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+
return fence;
}
static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
int r = 0;
@@ -247,8 +286,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
trace_amdgpu_sched_run_job(job);
- if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
- dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
+ /* Skip job if VRAM is lost and never resubmit gangs */
+ if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter) ||
+ (job->job_run_counter && job->gang_submit))
+ dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
DRM_INFO("Skip scheduling IBs!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index babc0af751c2..ab7b150e5d50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -50,6 +50,7 @@ struct amdgpu_job {
struct amdgpu_sync sync;
struct amdgpu_sync sched_sync;
struct dma_fence hw_fence;
+ struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
bool vm_needs_flush;
@@ -72,11 +73,20 @@ struct amdgpu_job {
struct amdgpu_ib ibs[];
};
+static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
+{
+ return to_amdgpu_ring(job->base.entity->rq->sched);
+}
+
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_job **job, struct amdgpu_vm *vm);
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa);
void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader);
void amdgpu_job_free(struct amdgpu_job *job);
int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
void *owner, struct dma_fence **f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 77668c3dae5b..fe23e09eec98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -247,6 +247,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.rlc_srls_fw_version;
fw_info->feature = adev->gfx.rlc_srls_feature_version;
break;
+ case AMDGPU_INFO_FW_GFX_RLCP:
+ fw_info->ver = adev->gfx.rlcp_ucode_version;
+ fw_info->feature = adev->gfx.rlcp_ucode_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLCV:
+ fw_info->ver = adev->gfx.rlcv_ucode_version;
+ fw_info->feature = adev->gfx.rlcv_ucode_feature_version;
+ break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
@@ -328,6 +336,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->psp.cap_fw_version;
fw_info->feature = adev->psp.cap_feature_version;
break;
+ case AMDGPU_INFO_FW_MES_KIQ:
+ fw_info->ver = adev->mes.ucode_fw_version[0];
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_MES:
+ fw_info->ver = adev->mes.ucode_fw_version[1];
+ fw_info->feature = 0;
+ break;
default:
return -EINVAL;
}
@@ -1469,6 +1485,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* RLCP */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLCV */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
@@ -1581,6 +1613,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
fw_info.feature, fw_info.ver);
}
+ /* MES_KIQ */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* MES */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7b46f6bf4187..ad980f4b66e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -222,6 +222,8 @@ struct mes_add_queue_input {
uint64_t tba_addr;
uint64_t tma_addr;
uint32_t is_kfd_process;
+ uint32_t is_aql_queue;
+ uint32_t queue_size;
};
struct mes_remove_queue_input {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e6a9b9fc9e0b..2e8f6cd7a729 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -688,13 +688,16 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
* num of amdgpu_vm_pt entries.
*/
BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
- bp->destroy = &amdgpu_bo_vm_destroy;
r = amdgpu_bo_create(adev, bp, &bo_ptr);
if (r)
return r;
*vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list);
+ /* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list
+ * is initialized.
+ */
+ bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy;
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index cfcaf890a6a1..effa7df3ddbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -511,6 +511,11 @@ static int psp_sw_fini(void *handle)
kfree(cmd);
cmd = NULL;
+ if (psp->km_ring.ring_mem)
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &psp->km_ring.ring_mem_mc_addr,
+ (void **)&psp->km_ring.ring_mem);
+
amdgpu_bo_free_kernel(&psp->fw_pri_bo,
&psp->fw_pri_mc_addr, &psp->fw_pri_buf);
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
@@ -766,7 +771,7 @@ static int psp_tmr_init(struct psp_context *psp)
}
pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev),
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
@@ -2055,6 +2060,15 @@ static int psp_hw_start(struct psp_context *psp)
}
}
+ if ((is_psp_fw_valid(psp->ras_drv)) &&
+ (psp->funcs->bootloader_load_ras_drv != NULL)) {
+ ret = psp_bootloader_load_ras_drv(psp);
+ if (ret) {
+ DRM_ERROR("PSP load ras_drv failed!\n");
+ return ret;
+ }
+ }
+
if ((is_psp_fw_valid(psp->sos)) &&
(psp->funcs->bootloader_load_sos != NULL)) {
ret = psp_bootloader_load_sos(psp);
@@ -3040,6 +3054,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes);
psp->dbg_drv.start_addr = ucode_start_addr;
break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ras_drv.start_addr = ucode_start_addr;
+ break;
default:
dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index c32b74bd970f..58ce3ebb446c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -36,6 +36,7 @@
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
+#define PSP_TMR_ALIGNMENT 0x100000
#define PSP_FW_NAME_LEN 0x24
enum psp_shared_mem_size {
@@ -71,6 +72,7 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_SOCDRV = 0xB0000,
PSP_BL__LOAD_DBGDRV = 0xC0000,
PSP_BL__LOAD_INTFDRV = 0xD0000,
+ PSP_BL__LOAD_RASDRV = 0xE0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
@@ -114,6 +116,7 @@ struct psp_funcs
int (*bootloader_load_soc_drv)(struct psp_context *psp);
int (*bootloader_load_intf_drv)(struct psp_context *psp);
int (*bootloader_load_dbg_drv)(struct psp_context *psp);
+ int (*bootloader_load_ras_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
int (*ring_create)(struct psp_context *psp,
@@ -323,6 +326,7 @@ struct psp_context
struct psp_bin_desc soc_drv;
struct psp_bin_desc intf_drv;
struct psp_bin_desc dbg_drv;
+ struct psp_bin_desc ras_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -403,6 +407,9 @@ struct amdgpu_psp_funcs {
((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0)
#define psp_bootloader_load_dbg_drv(psp) \
((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0)
+#define psp_bootloader_load_ras_drv(psp) \
+ ((psp)->funcs->bootloader_load_ras_drv ? \
+ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ab9ba5a9c33d..2dad7aa9a03b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1811,7 +1811,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
amdgpu_ras_query_error_status(adev, &info);
if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
- adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) {
if (amdgpu_ras_reset_error_status(adev, info.head.block))
dev_warn(adev->dev, "Failed to reset error counter and error status");
}
@@ -2719,7 +2720,8 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
/* Need disable ras on all IPs here before ip [hw/sw]fini */
- amdgpu_ras_disable_all_features(adev, 0);
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 0);
amdgpu_ras_recovery_fini(adev);
return 0;
}
@@ -2832,11 +2834,8 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
struct mce *m = (struct mce *)data;
struct amdgpu_device *adev = NULL;
uint32_t gpu_id = 0;
- uint32_t umc_inst = 0;
- uint32_t ch_inst, channel_index = 0;
+ uint32_t umc_inst = 0, ch_inst = 0;
struct ras_err_data err_data = {0, 0, 0, NULL};
- struct eeprom_table_record err_rec;
- uint64_t retired_page;
/*
* If the error was generated in UMC_V2, which belongs to GPU UMCs,
@@ -2875,21 +2874,22 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
umc_inst, ch_inst);
+ err_data.err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error record in mca notifier!\n");
+ return NOTIFY_DONE;
+ }
+
/*
* Translate UMC channel address to Physical address
*/
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num
- + ch_inst];
-
- retired_page = ADDR_OF_8KB_BLOCK(m->addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(m->addr);
-
- memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
- err_data.err_addr = &err_rec;
- amdgpu_umc_fill_error_record(&err_data, m->addr,
- retired_page, channel_index, umc_inst);
+ if (adev->umc.ras &&
+ adev->umc.ras->convert_ras_error_address)
+ adev->umc.ras->convert_ras_error_address(adev,
+ &err_data, m->addr, ch_inst, umc_inst);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
@@ -2897,6 +2897,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
amdgpu_ras_save_bad_pages(adev);
}
+ kfree(err_data.err_addr);
return NOTIFY_OK;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index c4283987bb1e..84c241b9a2a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -38,6 +38,7 @@
#define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0
#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0
#define EEPROM_I2C_MADDR_ALDEBARAN 0x0
+#define EEPROM_I2C_MADDR_SMU_13_0_0 (0x54UL << 16)
/*
* The 2 macros bellow represent the actual size in bytes that
@@ -156,6 +157,15 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
return false;
}
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 0):
+ control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0;
+ break;
+
+ default:
+ break;
+ }
+
return true;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index f71b83c42590..f5318fedf2f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -31,6 +31,7 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
AMDGPU_SKIP_MODE2_RESET = 2,
+ AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
};
struct amdgpu_reset_context {
@@ -112,7 +113,8 @@ static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *dom
static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain)
{
- kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
+ if (domain)
+ kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
}
static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 6373bfb47d55..012b72d00e04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -272,3 +272,275 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+
+static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *common_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+ unsigned int *tmp;
+ unsigned int i;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n");
+ return -ENOMEM;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ if (info->fw) {
+ common_hdr = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+ adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version);
+ adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version);
+ adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
+ adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
+
+ adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version);
+ adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version);
+ adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
+ adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor)
+{
+ int err;
+
+ if (version_major < 2) {
+ /* only support rlc_hdr v2.x and onwards */
+ dev_err(adev->dev, "unsupported rlc fw hdr\n");
+ return -EINVAL;
+ }
+
+ /* is_rlc_v2_1 is still used in APU code path */
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ if (version_minor >= 0) {
+ err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+ if (err) {
+ dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+ return err;
+ }
+ }
+ if (version_minor >= 1)
+ amdgpu_gfx_rlc_init_microcode_v2_1(adev);
+ if (version_minor >= 2)
+ amdgpu_gfx_rlc_init_microcode_v2_2(adev);
+ if (version_minor == 3)
+ amdgpu_gfx_rlc_init_microcode_v2_3(adev);
+ if (version_minor == 4)
+ amdgpu_gfx_rlc_init_microcode_v2_4(adev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 03ac36b2c2cf..23f060db9255 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -267,5 +267,7 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
-
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 42c1f050542f..ea5278f094c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -21,6 +21,7 @@
*
*/
+#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
@@ -150,3 +151,158 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ int err = 0;
+ uint16_t version_major;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const struct sdma_firmware_header_v2_0 *hdr_v2;
+
+ err = amdgpu_ucode_validate(sdma_inst->fw);
+ if (err)
+ return err;
+
+ header = (const struct common_firmware_header *)
+ sdma_inst->fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ switch (version_major) {
+ case 1:
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ break;
+ case 2:
+ hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ release_firmware(adev->sdma.instance[i].fw);
+ if (duplicate)
+ break;
+ }
+
+ memset((void *)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
+ char *fw_name, u32 instance,
+ bool duplicate)
+{
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ int err = 0, i;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+ uint16_t version_major;
+
+ err = request_firmware(&adev->sdma.instance[instance].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ header = (const struct common_firmware_header *)
+ adev->sdma.instance[instance].fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ if ((duplicate && instance) || (!duplicate && version_major > 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]);
+ if (err)
+ goto out;
+
+ if (duplicate) {
+ for (i = 1; i < adev->sdma.num_instances; i++)
+ memcpy((void *)&adev->sdma.instance[i],
+ (void *)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ switch (version_major) {
+ case 1:
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!duplicate && (instance != i))
+ continue;
+ else {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+ break;
+ case 2:
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ }
+
+out:
+ if (err) {
+ DRM_ERROR("SDMA: Failed to init firmware \"%s\"\n", fw_name);
+ amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
+ }
+ return err;
+}
+
+void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *sdma;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue) {
+ sdma = &adev->sdma.instance[i].page;
+ if (adev->mman.buffer_funcs_ring == sdma) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ break;
+ }
+ }
+ sdma = &adev->sdma.instance[i].ring;
+ if (adev->mman.buffer_funcs_ring == sdma) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ break;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 53ac3ebae8d6..7d99205c2e01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -124,4 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
+ char *fw_name, u32 instance, bool duplicate);
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate);
+void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 504af1b93bfa..090e66a1b284 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2014 Advanced Micro Devices, Inc.
* All Rights Reserved.
@@ -315,6 +316,7 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
struct hlist_node *tmp;
struct dma_fence *f;
int i;
+
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
@@ -392,7 +394,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
- unsigned i;
+ unsigned int i;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
hash_del(&e->node);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 06dfcf297a8d..5e6ddc7e101c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -140,8 +140,10 @@ TRACE_EVENT(amdgpu_bo_create,
);
TRACE_EVENT(amdgpu_cs,
- TP_PROTO(struct amdgpu_cs_parser *p, int i),
- TP_ARGS(p, i),
+ TP_PROTO(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib),
+ TP_ARGS(p, job, ib),
TP_STRUCT__entry(
__field(struct amdgpu_bo_list *, bo_list)
__field(u32, ring)
@@ -151,10 +153,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
- __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
- __entry->dw = p->job->ibs[i].length_dw;
+ __entry->ring = to_amdgpu_ring(job->base.sched)->idx;
+ __entry->dw = ib->length_dw;
__entry->fences = amdgpu_fence_count_emitted(
- to_amdgpu_ring(p->entity->rq->sched));
+ to_amdgpu_ring(job->base.sched));
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b1c455329023..dc262d2c2925 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -424,8 +424,9 @@ error:
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
struct ttm_resource *mem)
{
- uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
+ u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT;
struct amdgpu_res_cursor cursor;
+ u64 end;
if (mem->mem_type == TTM_PL_SYSTEM ||
mem->mem_type == TTM_PL_TT)
@@ -434,12 +435,18 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
return false;
amdgpu_res_first(mem, 0, mem_size, &cursor);
+ end = cursor.start + cursor.size;
+ while (cursor.remaining) {
+ amdgpu_res_next(&cursor, cursor.size);
- /* ttm_resource_ioremap only supports contiguous memory */
- if (cursor.size != mem_size)
- return false;
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (end != cursor.start)
+ return false;
+
+ end = cursor.start + cursor.size;
+ }
- return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
+ return end <= adev->gmc.visible_vram_size;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 939c8614f0e3..dd0bc649a57d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -164,70 +164,138 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
} else if (version_major == 2) {
const struct rlc_firmware_header_v2_0 *rlc_hdr =
container_of(hdr, struct rlc_firmware_header_v2_0, header);
+ const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 =
+ container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 =
+ container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1);
+ const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 =
+ container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2);
+ const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 =
+ container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3);
- DRM_DEBUG("ucode_feature_version: %u\n",
- le32_to_cpu(rlc_hdr->ucode_feature_version));
- DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
- DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
- DRM_DEBUG("save_and_restore_offset: %u\n",
- le32_to_cpu(rlc_hdr->save_and_restore_offset));
- DRM_DEBUG("clear_state_descriptor_offset: %u\n",
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
- DRM_DEBUG("avail_scratch_ram_locations: %u\n",
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
- DRM_DEBUG("reg_restore_list_size: %u\n",
- le32_to_cpu(rlc_hdr->reg_restore_list_size));
- DRM_DEBUG("reg_list_format_start: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_start));
- DRM_DEBUG("reg_list_format_separate_start: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
- DRM_DEBUG("starting_offsets_start: %u\n",
- le32_to_cpu(rlc_hdr->starting_offsets_start));
- DRM_DEBUG("reg_list_format_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
- DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- DRM_DEBUG("reg_list_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_size_bytes));
- DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
- DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
- DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
- DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
- if (version_minor == 1) {
- const struct rlc_firmware_header_v2_1 *v2_1 =
- container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ switch (version_minor) {
+ case 0:
+ /* rlc_hdr v2_0 */
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr->ucode_feature_version));
+ DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
+ DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
+ DRM_DEBUG("save_and_restore_offset: %u\n",
+ le32_to_cpu(rlc_hdr->save_and_restore_offset));
+ DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+ DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+ DRM_DEBUG("reg_restore_list_size: %u\n",
+ le32_to_cpu(rlc_hdr->reg_restore_list_size));
+ DRM_DEBUG("reg_list_format_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_start));
+ DRM_DEBUG("reg_list_format_separate_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
+ DRM_DEBUG("starting_offsets_start: %u\n",
+ le32_to_cpu(rlc_hdr->starting_offsets_start));
+ DRM_DEBUG("reg_list_format_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
+ DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ DRM_DEBUG("reg_list_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes));
+ DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
+ DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
+ DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+ DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+ break;
+ case 1:
+ /* rlc_hdr v2_1 */
DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
- le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+ le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length));
DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver));
DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver));
DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes));
DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes));
DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver));
DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver));
DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes));
DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes));
DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver));
DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver));
DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes));
DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
- le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes));
+ break;
+ case 2:
+ /* rlc_hdr v2_2 */
+ DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes));
+ DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes));
+ DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes));
+ DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes));
+ break;
+ case 3:
+ /* rlc_hdr v2_3 */
+ DRM_DEBUG("rlcp_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version));
+ DRM_DEBUG("rlcp_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version));
+ DRM_DEBUG("rlcp_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes));
+ DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes));
+ DRM_DEBUG("rlcv_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version));
+ DRM_DEBUG("rlcv_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version));
+ DRM_DEBUG("rlcv_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes));
+ DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes));
+ break;
+ case 4:
+ /* rlc_hdr v2_4 */
+ DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes));
+ break;
+ default:
+ DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor);
+ break;
}
} else {
DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 96b6cf4c4d54..1c36235b4539 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -124,6 +124,7 @@ enum psp_fw_type {
PSP_FW_TYPE_PSP_SOC_DRV,
PSP_FW_TYPE_PSP_INTF_DRV,
PSP_FW_TYPE_PSP_DBG_DRV,
+ PSP_FW_TYPE_PSP_RAS_DRV,
};
/* version_major=2, version_minor=0 */
@@ -260,8 +261,12 @@ struct rlc_firmware_header_v2_2 {
/* version_major=2, version_minor=3 */
struct rlc_firmware_header_v2_3 {
struct rlc_firmware_header_v2_2 v2_2;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
uint32_t rlcp_ucode_size_bytes;
uint32_t rlcp_ucode_offset_bytes;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
uint32_t rlcv_ucode_size_bytes;
uint32_t rlcv_ucode_offset_bytes;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 3629d8f292ef..e46439274f3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -51,6 +51,9 @@ struct amdgpu_umc_ras {
struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+ void (*convert_ras_error_address)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst);
void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index f36e4f08db6d..0b52af415b28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -191,7 +191,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
fw_name = FIRMWARE_VCN4_0_2;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = false;
+ adev->vcn.indirect_sram = true;
break;
case IP_VERSION(4, 0, 4):
fw_name = FIRMWARE_VCN4_0_4;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 80b7a6cfd026..253ea6b159df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -161,7 +161,8 @@
#define AMDGPU_VCN_SW_RING_FLAG (1 << 9)
#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10)
#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
-#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 12)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
+#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
@@ -171,6 +172,9 @@
#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1)
+
enum fw_queue_mode {
FW_QUEUE_RING_RESET = 1,
FW_QUEUE_DPG_HOLD_OFF = 2,
@@ -335,7 +339,9 @@ struct amdgpu_vcn4_fw_shared {
struct amdgpu_fw_shared_unified_queue_struct sq;
uint8_t pad1[8];
struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
};
struct amdgpu_vcn_fwlog {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 59cac347baa3..83b0c5d86e48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -183,10 +183,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
else
list_move_tail(&vm_bo->vm_status, &vm->evicted);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
* amdgpu_vm_bo_moved - vm_bo is moved
@@ -198,7 +200,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -211,7 +215,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ spin_unlock(&vm_bo->vm->status_lock);
vm_bo->moved = false;
}
@@ -225,9 +231,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -240,10 +246,13 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
- if (vm_bo->bo->parent)
+ if (vm_bo->bo->parent) {
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
- else
+ spin_unlock(&vm_bo->vm->status_lock);
+ } else {
amdgpu_vm_bo_idle(vm_bo);
+ }
}
/**
@@ -256,9 +265,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -363,12 +372,20 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*validate)(void *p, struct amdgpu_bo *bo),
void *param)
{
- struct amdgpu_vm_bo_base *bo_base, *tmp;
+ struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_bo *shadow;
+ struct amdgpu_bo *bo;
int r;
- list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
- struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->evicted)) {
+ bo_base = list_first_entry(&vm->evicted,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_base->bo;
+ shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo);
if (r)
@@ -385,7 +402,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
+ spin_lock(&vm->status_lock);
}
+ spin_unlock(&vm->status_lock);
amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
@@ -406,13 +425,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
+ bool empty;
bool ret;
amdgpu_vm_eviction_lock(vm);
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
- return ret && list_empty(&vm->evicted);
+ spin_lock(&vm->status_lock);
+ empty = list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
+
+ return ret && empty;
}
/**
@@ -680,9 +704,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm_update_params params;
struct amdgpu_vm_bo_base *entry;
bool flush_tlb_needed = false;
+ LIST_HEAD(relocated);
int r, idx;
- if (list_empty(&vm->relocated))
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->relocated, &relocated);
+ spin_unlock(&vm->status_lock);
+
+ if (list_empty(&relocated))
return 0;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -697,7 +726,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (r)
goto error;
- list_for_each_entry(entry, &vm->relocated, vm_status) {
+ list_for_each_entry(entry, &relocated, vm_status) {
/* vm_flush_needed after updating moved PDEs */
flush_tlb_needed |= entry->moved;
@@ -713,9 +742,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (flush_tlb_needed)
atomic64_inc(&vm->tlb_seq);
- while (!list_empty(&vm->relocated)) {
- entry = list_first_entry(&vm->relocated,
- struct amdgpu_vm_bo_base,
+ while (!list_empty(&relocated)) {
+ entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
vm_status);
amdgpu_vm_bo_idle(entry);
}
@@ -912,6 +940,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
{
struct amdgpu_bo_va *bo_va, *tmp;
+ spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -936,7 +965,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
gtt_mem, cpu_mem);
}
- spin_lock(&vm->invalidated_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -949,7 +977,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
gtt_mem, cpu_mem);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_bo_update - update all BO mappings in the vm page table
@@ -1278,24 +1306,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct amdgpu_bo_va *bo_va, *tmp;
+ struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
bool clear;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
/* Per VM BOs never need to bo cleared in the page tables */
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
+ spin_lock(&vm->status_lock);
}
- spin_lock(&vm->invalidated_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
if (!amdgpu_vm_debug && dma_resv_trylock(resv))
@@ -1310,9 +1343,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
if (!clear)
dma_resv_unlock(resv);
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
@@ -1387,7 +1420,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!bo_va->base.moved) {
- list_move(&bo_va->base.vm_status, &vm->moved);
+ amdgpu_vm_bo_moved(&bo_va->base);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
@@ -1763,9 +1796,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
}
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2019,9 +2052,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->invalidated);
- spin_lock_init(&vm->invalidated_lock);
+ spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
+ INIT_LIST_HEAD(&vm->pt_freed);
+ INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
/* create scheduler entities for page table updates */
r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
@@ -2223,6 +2258,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+ flush_work(&vm->pt_free_work);
+
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
amdgpu_vm_set_pasid(adev, vm, 0);
@@ -2484,8 +2521,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
/* Intentionally setting invalid PTE flag
* combination to force a no-retry-fault
*/
- flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
- AMDGPU_PTE_TF;
+ flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
value = 0;
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
@@ -2548,6 +2584,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
+ spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
@@ -2585,7 +2622,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
- spin_lock(&vm->invalidated_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -2600,7 +2636,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
continue;
total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
total_done_objs = id;
seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9ecb7f663e19..83acb7bd80fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -254,6 +254,9 @@ struct amdgpu_vm {
bool evicting;
unsigned int saved_flags;
+ /* Lock to protect vm_bo add/del/move on all lists of vm */
+ spinlock_t status_lock;
+
/* BOs who needs a validation */
struct list_head evicted;
@@ -268,7 +271,6 @@ struct amdgpu_vm {
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
- spinlock_t invalidated_lock;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
@@ -276,6 +278,10 @@ struct amdgpu_vm {
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
+ /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
+ struct list_head pt_freed;
+ struct work_struct pt_free_work;
+
/* contains the page directory */
struct amdgpu_vm_bo_base root;
struct dma_fence *last_update;
@@ -471,6 +477,7 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags);
+void amdgpu_vm_pt_free_work(struct work_struct *work);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 88de9f0d4728..358b91243e37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -637,10 +637,34 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
}
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
entry->bo->vm_bo = NULL;
+
+ spin_lock(&entry->vm->status_lock);
list_del(&entry->vm_status);
+ spin_unlock(&entry->vm->status_lock);
amdgpu_bo_unref(&entry->bo);
}
+void amdgpu_vm_pt_free_work(struct work_struct *work)
+{
+ struct amdgpu_vm_bo_base *entry, *next;
+ struct amdgpu_vm *vm;
+ LIST_HEAD(pt_freed);
+
+ vm = container_of(work, struct amdgpu_vm, pt_free_work);
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->pt_freed, &pt_freed);
+ spin_unlock(&vm->status_lock);
+
+ /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
+ amdgpu_bo_reserve(vm->root.bo, true);
+
+ list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
+ amdgpu_vm_pt_free(entry);
+
+ amdgpu_bo_unreserve(vm->root.bo);
+}
+
/**
* amdgpu_vm_pt_free_dfs - free PD/PT levels
*
@@ -652,11 +676,24 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
*/
static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start)
+ struct amdgpu_vm_pt_cursor *start,
+ bool unlocked)
{
struct amdgpu_vm_pt_cursor cursor;
struct amdgpu_vm_bo_base *entry;
+ if (unlocked) {
+ spin_lock(&vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+ list_move(&entry->vm_status, &vm->pt_freed);
+
+ if (start)
+ list_move(&start->entry->vm_status, &vm->pt_freed);
+ spin_unlock(&vm->status_lock);
+ schedule_work(&vm->pt_free_work);
+ return;
+ }
+
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
amdgpu_vm_pt_free(entry);
@@ -673,7 +710,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
*/
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- amdgpu_vm_pt_free_dfs(adev, vm, NULL);
+ amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
}
/**
@@ -966,7 +1003,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
if (cursor.entry->bo) {
params->table_freed = true;
amdgpu_vm_pt_free_dfs(adev, params->vm,
- &cursor);
+ &cursor,
+ params->unlocked);
}
amdgpu_vm_pt_next(adev, &cursor);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 1fd3cbca20a2..2b0669c464f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -112,7 +112,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
swap(p->vm->last_unlocked, tmp);
dma_fence_put(tmp);
} else {
- amdgpu_bo_fence(p->vm->root.bo, f, true);
+ dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f,
+ DMA_RESV_USAGE_BOOKKEEP);
}
if (fence && !p->immediate)
@@ -211,12 +212,15 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
int r;
/* Wait for PD/PT moves to be completed */
- dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
- DMA_RESV_USAGE_KERNEL, fence) {
+ dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
r = amdgpu_sync_fence(&p->job->sync, fence);
- if (r)
+ if (r) {
+ dma_resv_iter_end(&cursor);
return r;
+ }
}
+ dma_resv_iter_end(&cursor);
do {
ndw = p->num_dw_left;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index d3b483aa81f8..47159e9a0884 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -392,12 +392,20 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
}
/**
+ * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,
+ * Host driver decide how to reset the GPU either through FLR or chain reset.
+ * Guest side will get individual notifications from the host for the FLR
+ * if necessary.
+ */
+ if (!amdgpu_sriov_vf(adev)) {
+ /**
* Avoid recreating reset domain when hive is reconstructed for the case
- * of reset the devices in the XGMI hive during probe for SRIOV
+ * of reset the devices in the XGMI hive during probe for passthrough GPU
* See https://www.spinics.net/lists/amd-gfx/msg58836.html
*/
- if (adev->reset_domain->type != XGMI_HIVE) {
- hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
+ if (adev->reset_domain->type != XGMI_HIVE) {
+ hive->reset_domain =
+ amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
if (!hive->reset_domain) {
dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
ret = -ENOMEM;
@@ -406,9 +414,10 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
hive = NULL;
goto pro_end;
}
- } else {
- amdgpu_reset_get_reset_domain(adev->reset_domain);
- hive->reset_domain = adev->reset_domain;
+ } else {
+ amdgpu_reset_get_reset_domain(adev->reset_domain);
+ hive->reset_domain = adev->reset_domain;
+ }
}
hive->hive_id = adev->gmc.xgmi.hive_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 552e6fb55aa8..30dcc1681b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -43,6 +43,7 @@ struct amdgpu_hive_info {
} pstate;
struct amdgpu_reset_domain *reset_domain;
+ uint32_t device_remove_count;
};
struct amdgpu_pcs_ras_field {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 5647f13b98d4..cbca9866645c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -309,14 +309,10 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
*/
static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index e4dde41f2f68..af94ac580d3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3943,56 +3943,6 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
DRM_WARN_ONCE("CP firmware version too old, please update!");
}
-
-static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
-static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_2 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
- adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
- adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
- adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
-}
-
-static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_4 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
- adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
- adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
- adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
- adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
- adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
- adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
- adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
- adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
- adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
-}
-
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@@ -4028,12 +3978,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
char fw_name[40];
char *wks = "";
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
@@ -4091,9 +4036,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
@@ -4102,9 +4045,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = amdgpu_ucode_validate(adev->gfx.me_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
@@ -4113,69 +4054,27 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = amdgpu_ucode_validate(adev->gfx.ce_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
+ /* don't check this. There are apparently firmwares in the wild with
+ * incorrect size in the header
+ */
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ if (err)
+ dev_dbg(adev->dev,
+ "gfx10: amdgpu_ucode_validate() failed \"%s\"\n",
+ fw_name);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (version_major == 2) {
- if (version_minor >= 1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
- if (version_minor >= 2)
- gfx_v10_0_init_rlc_iram_dram_microcode(adev);
- if (version_minor == 4) {
- gfx_v10_0_init_tap_delays_microcode(adev);
- }
- }
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
@@ -4185,9 +4084,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = amdgpu_ucode_validate(adev->gfx.mec_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
@@ -4195,164 +4093,18 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
err = 0;
adev->gfx.mec2_fw = NULL;
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- if (info->fw) {
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- if (adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
-
- if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
- adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
- }
-
- }
-
- if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
- }
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
- }
-
gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
dev_err(adev->dev,
- "gfx10: Failed to load firmware \"%s\"\n",
+ "gfx10: Failed to init firmware \"%s\"\n",
fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index fa718318568e..251109723ab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -438,61 +438,12 @@ static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
-static void gfx_v11_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
-static void gfx_v11_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_2 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
- adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
- adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
- adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
-}
-
-static void gfx_v11_0_init_rlcp_rlcv_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_3 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
- adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
- adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
- adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
-}
-
static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[40];
char ucode_prefix[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
- const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
@@ -513,14 +464,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_fw->data, 2, 0);
if (adev->gfx.rs64_enable) {
dev_info(adev->dev, "CP RS64 enable\n");
- cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
-
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
} else {
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
@@ -531,14 +479,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
if (adev->gfx.rs64_enable) {
- cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
-
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
} else {
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
}
if (!amdgpu_sriov_vf(adev)) {
@@ -547,58 +492,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ if (err)
+ goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (version_major == 2) {
- if (version_minor >= 1)
- gfx_v11_0_init_rlc_ext_microcode(adev);
- if (version_minor >= 2)
- gfx_v11_0_init_rlc_iram_dram_microcode(adev);
- if (version_minor == 3)
- gfx_v11_0_init_rlcp_rlcv_microcode(adev);
- }
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
@@ -609,190 +510,23 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
if (adev->gfx.rs64_enable) {
- cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
-
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
} else {
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
}
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- if (adev->gfx.rs64_enable) {
- cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK];
- info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
- } else {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) -
- le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
- }
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- if (info->fw) {
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- if (adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
- adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
- }
-
- if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
- }
- }
-
out:
if (err) {
dev_err(adev->dev,
- "gfx11: Failed to load firmware \"%s\"\n",
+ "gfx11: Failed to init firmware \"%s\"\n",
fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
@@ -5240,6 +4974,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
{
u32 reg, data;
+ amdgpu_gfx_off_ctrl(adev, false);
+
reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(reg);
@@ -5253,6 +4989,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
else
WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
}
static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1d6d3a852a0b..0320be4a5fc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1091,27 +1091,6 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
-static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_1 *rlc_hdr;
-
- rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
- adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
- adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
- adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
- adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
- adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
- adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
- adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
- adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
- adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
- adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
- adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
- adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
- adev->gfx.rlc.reg_list_format_direct_reg_list_length =
- le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
-}
-
static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.me_fw_write_wait = false;
@@ -1273,9 +1252,6 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
{
char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
@@ -1284,9 +1260,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
@@ -1295,9 +1269,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_validate(adev->gfx.me_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
@@ -1306,37 +1278,12 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_validate(adev->gfx.ce_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
out:
if (err) {
dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
+ "gfx9: Failed to init firmware \"%s\"\n",
fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
@@ -1353,11 +1300,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
{
char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
- unsigned int *tmp = NULL;
- unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
uint32_t smu_version;
@@ -1386,92 +1329,17 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
if (err)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ if (err)
+ goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
- if (version_major == 2 && version_minor == 1)
- adev->gfx.rlc.is_rlc_v2_1 = true;
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
- le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
- le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
- le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
- le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
- le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
- le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
- le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
- le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
- goto out;
- }
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
-
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
-
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
-
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v9_0_init_rlc_ext_microcode(adev);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- if (adev->gfx.rlc.is_rlc_v2_1 &&
- adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
- adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
- adev->gfx.rlc.save_restore_list_srm_size_bytes) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
- }
- }
-
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
out:
if (err) {
dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
+ "gfx9: Failed to init firmware \"%s\"\n",
fw_name);
release_firmware(adev->gfx.rlc_fw);
adev->gfx.rlc_fw = NULL;
@@ -1494,9 +1362,6 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
{
char fw_name[30];
int err;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct gfx_firmware_header_v1_0 *cp_hdr;
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
@@ -1509,10 +1374,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_validate(adev->gfx.mec_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
-
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
@@ -1525,12 +1388,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
if (err)
goto out;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.mec2_fw->data;
- adev->gfx.mec2_fw_version =
- le32_to_cpu(cp_hdr->header.ucode_version);
- adev->gfx.mec2_feature_version =
- le32_to_cpu(cp_hdr->ucode_feature_version);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
err = 0;
adev->gfx.mec2_fw = NULL;
@@ -1540,49 +1399,12 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
-
- /* TODO: Determine if MEC2 JT FW loading can be removed
- for all GFX V9 asic and above */
- if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
- info->fw = adev->gfx.mec2_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
- PAGE_SIZE);
- }
- }
- }
-
out:
gfx_v9_0_check_if_need_gfxoff(adev);
gfx_v9_0_check_fw_write_wait(adev);
if (err) {
dev_err(adev->dev,
- "gfx9: Failed to load firmware \"%s\"\n",
+ "gfx9: Failed to init firmware \"%s\"\n",
fw_name);
release_firmware(adev->gfx.mec_fw);
adev->gfx.mec_fw = NULL;
@@ -5607,7 +5429,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
BUG_ON(offset > ring->buf_mask);
BUG_ON(ring->ring[offset] != 0x55aa55aa);
- cur = (ring->wptr & ring->buf_mask) - 1;
+ cur = (ring->wptr - 1) & ring->buf_mask;
if (likely(cur > offset))
ring->ring[offset] = cur - offset;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4603653916f5..67ca16a8027c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1103,10 +1103,13 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
*flags |= AMDGPU_PDE_BFS(0x9);
} else if (level == AMDGPU_VM_PDB0) {
- if (*flags & AMDGPU_PDE_PTE)
+ if (*flags & AMDGPU_PDE_PTE) {
*flags &= ~AMDGPU_PDE_PTE;
- else
+ if (!(*flags & AMDGPU_PTE_VALID))
+ *addr |= 1 << PAGE_SHIFT;
+ } else {
*flags |= AMDGPU_PTE_TF;
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
index 536dafb57ee0..fc69c1a29e23 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
@@ -22,6 +22,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_imu.h"
+#include "imu_v11_0_3.h"
#include "gc/gc_11_0_3_offset.h"
#include "gc/gc_11_0_3_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index b64cd46a159a..5cec6b259b7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -187,6 +187,19 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
mes_add_queue_pkt.trap_en = 1;
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) &&
+ (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
+ (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3))))
+ mes_add_queue_pkt.trap_en = 1;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index b465baa26762..aa761ff3a5fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
WREG32_PCIE(smnPCIE_LC_CNTL, data);
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v2_3_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v2_3_program_ltr(adev);
def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index f7f6ddebd3e4..37615a77287b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v6_1_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v6_1_program_ltr(adev);
def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 11848d1e238b..19455a725939 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
};
+#ifdef CONFIG_PCIEASPM
static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
@@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v7_4_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v7_4_program_ltr(adev);
def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index f30bc826a878..def89379b51a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -28,6 +28,14 @@
#include "nbio/nbio_7_7_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
@@ -336,4 +344,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_clockgating_state = nbio_v7_7_get_clockgating_state,
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
+ .remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 5b5b1ef0c2b1..21d822b1d589 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -224,6 +224,12 @@ static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
}
+static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+
static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
{
int ret;
@@ -720,6 +726,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv,
.bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
.bootloader_load_sos = psp_v13_0_bootloader_load_sos,
.ring_init = psp_v13_0_ring_init,
.ring_create = psp_v13_0_ring_create,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 6bdffdc1c0b9..c52d246a1d96 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -342,14 +342,10 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 2584fa3cb13e..486d9b5c1b9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -516,14 +516,10 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 0cf9d3b486b2..298fa11702e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -561,44 +561,6 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
}
}
-static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
-
- /* arcturus shares the same FW memory across
- all SDMA isntances */
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
- adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0))
- break;
- }
-
- memset((void *)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
/**
* sdma_v4_0_init_microcode - load ucode images from disk
*
@@ -615,9 +577,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
char fw_name[30];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
+ int ret, i;
DRM_DEBUG("\n");
@@ -656,58 +616,25 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
-
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) {
/* Acturus & Aldebaran will leverage the same FW memory
for every SDMA instance */
- memcpy((void *)&adev->sdma.instance[i],
- (void *)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
- }
- else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
-
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
- if (err)
- goto out;
- }
- }
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ ret = amdgpu_sdma_init_microcode(adev, fw_name, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, fw_name, i, false);
+ if (ret)
+ return ret;
}
}
-out:
- if (err) {
- DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v4_0_destroy_inst_ctx(adev);
- }
- return err;
+ return ret;
}
/**
@@ -988,18 +915,12 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
- int i, unset = 0;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- sdma[i] = &adev->sdma.instance[i].ring;
+ int i;
- if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = 1;
- }
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
@@ -1030,20 +951,12 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
*/
static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
- bool unset = false;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- sdma[i] = &adev->sdma.instance[i].page;
-
- if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
- (!unset)) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = true;
- }
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -1504,6 +1417,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
if (!amdgpu_sriov_vf(adev)) {
+ ring = &adev->sdma.instance[i].ring;
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
+ ring->use_doorbell, ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
+
/* unhalt engine */
temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
@@ -1995,7 +1913,11 @@ static int sdma_v4_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
- sdma_v4_0_destroy_inst_ctx(adev);
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0) ||
+ adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
return 0;
}
@@ -2018,8 +1940,11 @@ static int sdma_v4_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
return 0;
+ }
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index a019ac92edb7..d4d9f196db83 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -240,10 +240,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
char fw_name[40];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
- const struct sdma_firmware_header_v1_0 *hdr;
+ int ret, i;
if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5)))
return 0;
@@ -272,38 +269,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
- if (err)
- goto out;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
- if (adev->sdma.instance[i].feature_version >= 20)
- adev->sdma.instance[i].burst_nop = true;
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
+ ret = amdgpu_sdma_init_microcode(adev, fw_name, i, false);
+ if (ret)
+ return ret;
}
-out:
- if (err) {
- DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name);
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
- }
- }
- return err;
+
+ return ret;
}
static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
@@ -613,14 +584,10 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
@@ -1465,12 +1432,10 @@ static int sdma_v5_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- release_firmware(adev->sdma.instance[i].fw);
- adev->sdma.instance[i].fw = NULL;
-
+ for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- }
+
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
return 0;
}
@@ -1491,8 +1456,11 @@ static int sdma_v5_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
return 0;
+ }
sdma_v5_0_ctx_switch_enable(adev, false);
sdma_v5_0_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 95689ef4be10..809eca54fc61 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -89,33 +89,6 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
return base + internal_offset;
}
-static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v1_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- release_firmware(adev->sdma.instance[0].fw);
-
- memset((void *)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
/**
* sdma_v5_2_init_microcode - load ucode images from disk
*
@@ -132,9 +105,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
char fw_name[40];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct common_firmware_header *header = NULL;
DRM_DEBUG("\n");
@@ -169,42 +139,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++)
- memcpy((void *)&adev->sdma.instance[i],
- (void *)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
-
- if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 0)))
- return 0;
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
- }
-
-out:
- if (err) {
- DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v5_2_destroy_inst_ctx(adev);
- }
- return err;
+ return amdgpu_sdma_init_microcode(adev, fw_name, 0, true);
}
static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
@@ -479,18 +414,10 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
- struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring;
- struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1) ||
- (adev->mman.buffer_funcs_ring == sdma2) ||
- (adev->mman.buffer_funcs_ring == sdma3))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
@@ -1406,7 +1333,7 @@ static int sdma_v5_2_sw_fini(void *handle)
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- sdma_v5_2_destroy_inst_ctx(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
return 0;
}
@@ -1422,8 +1349,11 @@ static int sdma_v5_2_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
return 0;
+ }
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 7ae572a08cb3..da3beb0bf2fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -78,33 +78,6 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
return base + internal_offset;
}
-static int sdma_v6_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
-{
- int err = 0;
- const struct sdma_firmware_header_v2_0 *hdr;
-
- err = amdgpu_ucode_validate(sdma_inst->fw);
- if (err)
- return err;
-
- hdr = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
- sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
- sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
-
- if (sdma_inst->feature_version >= 20)
- sdma_inst->burst_nop = true;
-
- return 0;
-}
-
-static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev)
-{
- release_firmware(adev->sdma.instance[0].fw);
-
- memset((void*)adev->sdma.instance, 0,
- sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
-}
-
/**
* sdma_v6_0_init_microcode - load ucode images from disk
*
@@ -114,16 +87,10 @@ static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev)
* the driver (not loaded into hw).
* Returns 0 on success, error on failure.
*/
-
-// emulation only, won't work on real chip
-// sdma 6.0.0 real chip need to use PSP to load firmware
static int sdma_v6_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[30];
char ucode_prefix[30];
- int err = 0, i;
- struct amdgpu_firmware_info *info = NULL;
- const struct sdma_firmware_header_v2_0 *sdma_hdr;
DRM_DEBUG("\n");
@@ -131,43 +98,7 @@ static int sdma_v6_0_init_microcode(struct amdgpu_device *adev)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
- err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
- if (err)
- goto out;
-
- err = sdma_v6_0_init_inst_ctx(&adev->sdma.instance[0]);
- if (err)
- goto out;
-
- for (i = 1; i < adev->sdma.num_instances; i++) {
- memcpy((void*)&adev->sdma.instance[i],
- (void*)&adev->sdma.instance[0],
- sizeof(struct amdgpu_sdma_instance));
- }
-
- DRM_DEBUG("psp_load == '%s'\n",
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- sdma_hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
- info->fw = adev->sdma.instance[0].fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
- info->fw = adev->sdma.instance[0].fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
- }
-
-out:
- if (err) {
- DRM_ERROR("sdma_v6_0: Failed to load firmware \"%s\"\n", fw_name);
- sdma_v6_0_destroy_inst_ctx(adev);
- }
- return err;
+ return amdgpu_sdma_init_microcode(adev, fw_name, 0, true);
}
static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
@@ -467,14 +398,10 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
*/
static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
{
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
u32 rb_cntl, ib_cntl;
int i;
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
- (adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
@@ -484,9 +411,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
}
-
- sdma0->sched.ready = false;
- sdma1->sched.ready = false;
}
/**
@@ -915,7 +839,8 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
m->sdmax_rlcx_rb_cntl =
order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
- 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
@@ -1370,27 +1295,27 @@ static int sdma_v6_0_sw_fini(void *handle)
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
- sdma_v6_0_destroy_inst_ctx(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
return 0;
}
static int sdma_v6_0_hw_init(void *handle)
{
- int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = sdma_v6_0_start(adev);
-
- return r;
+ return sdma_v6_0_start(adev);
}
static int sdma_v6_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
return 0;
+ }
sdma_v6_0_ctx_switch_enable(adev, false);
sdma_v6_0_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index f675111ace20..4d5e718540aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -116,15 +116,14 @@ static void si_dma_stop(struct amdgpu_device *adev)
u32 rb_cntl;
unsigned i;
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
/* dma0 */
rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
rb_cntl &= ~DMA_RB_ENABLE;
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fde6154f2009..183024d7c184 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1211,25 +1211,6 @@ static int soc15_common_sw_fini(void *handle)
return 0;
}
-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
-{
- int i;
- struct amdgpu_ring *ring;
-
- /* sdma/ih doorbell range are programed by hypervisor */
- if (!amdgpu_sriov_vf(adev)) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- adev->nbio.funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
- }
-
- adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
- adev->irq.ih.doorbell_index);
- }
-}
-
static int soc15_common_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1249,12 +1230,6 @@ static int soc15_common_hw_init(void *handle)
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
- /* HW doorbell routing policy: doorbell writing not
- * in SDMA/IH/MM/ACV range will be routed to CP. So
- * we need to init SDMA/IH/MM/ACV doorbell range prior
- * to CP ip block init and ring test.
- */
- soc15_doorbell_range_init(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index a26c5723c46e..795706b3b092 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -421,6 +421,7 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
+ return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
return false;
default:
@@ -628,6 +629,8 @@ static int soc21_common_early_init(void *handle)
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags =
AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x1;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 939cb203f7ad..f17d297b594b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -327,10 +327,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
return;
}
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
@@ -343,10 +342,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(err_addr);
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1)
- amdgpu_umc_fill_error_record(err_data, err_addr,
+ amdgpu_umc_fill_error_record(err_data, err_addr,
retired_page, channel_index, umc_inst);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index bf7524f16b66..5d5d031c9e7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -187,20 +187,51 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
}
}
+static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint32_t channel_index;
+ uint64_t soc_pa, retired_page, column;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ /* clear [C4 C3 C2] in soc physical address */
+ soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
+
+ /* loop for all possibilities of [C4 C3 C2] */
+ for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
+ retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+
+ /* shift R14 bit */
+ retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+ }
+}
+
static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
uint32_t ch_inst,
uint32_t umc_inst)
{
- uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
- uint32_t channel_index;
+ uint64_t mc_umc_status, err_addr;
uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
if (mc_umc_status == 0)
@@ -209,42 +240,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr)
return;
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
- /* translate umc channel address to soc pa, 3 parts are included */
- soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* The umc channel bits are not original values, they are hashed */
- SET_CHANNEL_HASH(channel_index, soc_pa);
-
- /* clear [C4 C3 C2] in soc physical address */
- soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
- dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
-
- /* shift R14 bit */
- retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
- dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
- }
- }
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
}
@@ -452,14 +456,11 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
- uint32_t umc_reg_offset,
- uint32_t ch_inst,
+ uint32_t umc_reg_offset, uint32_t ch_inst,
uint32_t umc_inst)
{
uint32_t mc_umc_status_addr;
- uint32_t channel_index;
- uint64_t mc_umc_status, mc_umc_addrt0;
- uint64_t err_addr, soc_pa, retired_page, column;
+ uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -477,45 +478,15 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
return;
}
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
- err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr =
+ REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
- /* translate umc channel address to soc pa, 3 parts are included */
- soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* The umc channel bits are not original values, they are hashed */
- SET_CHANNEL_HASH(channel_index, soc_pa);
-
- /* clear [C4 C3 C2] in soc physical address */
- soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
- dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
-
- /* shift R14 bit */
- retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
- dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
- }
- }
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
/* clear umc status */
@@ -540,8 +511,7 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
ch_inst);
umc_v6_7_query_error_address(adev,
err_data,
- umc_reg_offset,
- ch_inst,
+ umc_reg_offset, ch_inst,
umc_inst);
}
}
@@ -583,4 +553,5 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
+ .convert_ras_error_address = umc_v6_7_convert_error_address,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index 36a2053f2e8b..91235df54e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -101,22 +101,16 @@ static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset,
unsigned long *error_count)
{
- uint32_t ecc_err_cnt, ecc_err_cnt_addr;
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
/* UMC 8_10 registers */
- ecc_err_cnt_addr =
- SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
- ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
- *error_count +=
- (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
- UMC_V8_10_CE_CNT_INIT);
-
- /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */
+ /* Rely on MCUMC_STATUS for correctable error counter
+ * MCUMC_STATUS is a 64 bit register
+ */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
@@ -214,7 +208,10 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
{
uint64_t mc_umc_status_addr;
uint64_t mc_umc_status, err_addr;
- uint32_t channel_index;
+ uint64_t mc_umc_addrt0, na_err_addr_base;
+ uint64_t na_err_addr, retired_page_addr;
+ uint32_t channel_index, addr_lsb, col = 0;
+ int ret = 0;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -235,13 +232,10 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
umc_inst * adev->umc.channel_inst_num +
ch_inst];
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
- uint32_t addr_lsb;
- uint64_t mc_umc_addrt0;
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
@@ -249,32 +243,24 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
/* the lowest lsb bits should be ignored */
addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
-
err_addr &= ~((0x1ULL << addr_lsb) - 1);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
- uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
- uint64_t na_err_addr, retired_page_addr;
- uint32_t col = 0;
- int ret = 0;
-
- /* loop for all possibilities of [C6 C5] in normal address. */
- for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
- na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
-
- /* Mapping normal error address to retired soc physical address. */
- ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
- na_err_addr, &retired_page_addr);
- if (ret) {
- dev_err(adev->dev, "Failed to map pa from umc na.\n");
- break;
- }
- dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
- retired_page_addr);
- amdgpu_umc_fill_error_record(err_data, na_err_addr,
- retired_page_addr, channel_index, umc_inst);
+ na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
+
+ /* loop for all possibilities of [C6 C5] in normal address. */
+ for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
+ na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
+
+ /* Mapping normal error address to retired soc physical address. */
+ ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
+ na_err_addr, &retired_page_addr);
+ if (ret) {
+ dev_err(adev->dev, "Failed to map pa from umc na.\n");
+ break;
}
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
+ retired_page_addr);
+ amdgpu_umc_fill_error_record(err_data, na_err_addr,
+ retired_page_addr, channel_index, umc_inst);
}
}
@@ -344,6 +330,31 @@ static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
}
}
+static uint32_t umc_v8_10_query_ras_poison_mode_per_channel(
+ struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_ctrl_addr, ecc_ctrl;
+
+ ecc_ctrl_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccCtrl);
+ ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr +
+ umc_reg_offset) * 4);
+
+ return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_GeccCtrl, UCFatalEn);
+}
+
+static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t umc_reg_offset = 0;
+
+ /* Enabling fatal error in umc node0 instance0 channel0 will be
+ * considered as fatal error mode
+ */
+ umc_reg_offset = get_umc_v8_10_reg_offset(adev, 0, 0, 0);
+ return !umc_v8_10_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
+}
+
const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
.query_ras_error_count = umc_v8_10_query_ras_error_count,
.query_ras_error_address = umc_v8_10_query_ras_error_address,
@@ -354,4 +365,5 @@ struct amdgpu_umc_ras umc_v8_10_ras = {
.hw_ops = &umc_v8_10_ras_hw_ops,
},
.err_cnt_init = umc_v8_10_err_cnt_init,
+ .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index f35253e0eaa6..b717fdaa46e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -108,20 +108,35 @@ static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
}
}
+static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint64_t retired_page;
+ uint32_t channel_index;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+}
+
static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
uint32_t ch_inst,
uint32_t umc_inst)
{
- uint64_t mc_umc_status, err_addr, retired_page;
- uint32_t channel_index;
+ uint64_t mc_umc_status, err_addr;
uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
if (mc_umc_status == 0)
@@ -130,24 +145,15 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr)
return;
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
- /* translate umc channel address to soc pa, 3 parts are included */
- retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1)
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
}
@@ -324,14 +330,12 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst)
{
uint32_t lsb, mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
mc_umc_addrt0 =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
-
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (mc_umc_status == 0)
@@ -343,10 +347,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
return;
}
- /* calculate error address if ue/ce error is detected */
+ /* calculate error address if ue error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
@@ -354,16 +357,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
err_addr &= ~((0x1ULL << lsb) - 1);
- /* translate umc channel address to soc pa, 3 parts are included */
- retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1)
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
}
/* clear umc status */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 39405f0db824..9c8b5fd99037 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1761,21 +1761,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
+static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
- if (atomic_read(&p->entity->fence_seq))
+ if (atomic_read(&job->base.entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
- drm_sched_entity_modify_sched(p->entity, scheds, 1);
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
return 0;
}
-static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
@@ -1846,7 +1848,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
- r = vcn_v3_0_limit_sched(p);
+ r = vcn_v3_0_limit_sched(p, job);
if (r)
goto out;
}
@@ -1860,7 +1862,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
uint32_t msg_lo = 0, msg_hi = 0;
unsigned i;
int r;
@@ -1879,7 +1881,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
msg_hi = val;
} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
val == 0) {
- r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
+ r = vcn_v3_0_dec_msg(p, job,
+ ((u64)msg_hi) << 32 | msg_lo);
if (r)
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 09c89faa8c27..897a5ce9c9da 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -150,6 +150,10 @@ static int vcn_v4_0_sw_init(void *handle)
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
fw_shared->sq.is_enabled = 1;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
if (amdgpu_sriov_vf(adev))
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
@@ -1591,21 +1595,23 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
- if (atomic_read(&p->entity->fence_seq))
+ if (atomic_read(&job->base.entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
[AMDGPU_RING_PRIO_0].sched;
- drm_sched_entity_modify_sched(p->entity, scheds, 1);
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
return 0;
}
-static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
@@ -1676,7 +1682,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
- r = vcn_v4_0_limit_sched(p);
+ r = vcn_v4_0_limit_sched(p, job);
if (r)
goto out;
}
@@ -1689,32 +1695,34 @@ out:
#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
- struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ struct amdgpu_vcn_decode_buffer *decode_buffer;
+ uint64_t addr;
uint32_t val;
- int r = 0;
/* The first instance can decode anything */
if (!ring->me)
- return r;
+ return 0;
/* unified queue ib header has 8 double words. */
if (ib->length_dw < 8)
- return r;
+ return 0;
val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
+ if (val != RADEON_VCN_ENGINE_TYPE_DECODE)
+ return 0;
- if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
- if (decode_buffer->valid_buf_flag & 0x1)
- r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo);
- }
- return r;
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
}
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 03b7066471f9..1e83db0c5438 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
}
}
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
pci_set_master(adev->pdev);
/* enable interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 2022ffbb8dba..59dfca093155 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
}
}
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
pci_set_master(adev->pdev);
/* enable interrupts */