diff options
author | Dave Airlie <airlied@redhat.com> | 2023-06-15 14:11:22 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-06-15 14:11:22 +1000 |
commit | 901bdf5ea1a836400ee69aa32b04e9c209271ec7 (patch) | |
tree | ccb1851c8a71e776dbccf1ccae132dc9b5f093c6 /drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | |
parent | ba57b9b11f78530146f02b776854b2b6b6d344a4 (diff) | |
parent | 3b718dcaf163d17fe907ea098c8449e0cd6bc271 (diff) |
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.5-2023-06-02:
amdgpu:
- SR-IOV fixes
- Warning fixes
- Misc code cleanups and spelling fixes
- DCN 3.2 updates
- Improved DC FAMS support for better power management
- Improved DC SubVP support for better power management
- DCN 3.1.x fixes
- Max IB size query
- DC GPU reset fixes
- RAS updates
- DCN 3.0.x fixes
- S/G display fixes
- CP shadow buffer support
- Implement connector force callback
- Z8 power improvements
- PSP 13.0.10 vbflash support
- Mode2 reset fixes
- Store MQDs in VRAM to improve queue switch latency
- VCN 3.x fixes
- JPEG 3.x fixes
- Enable DC_FP on LoongArch
- GFXOFF fixes
- GC 9.4.3 partition support
- SDMA 4.4.2 partition support
- VCN/JPEG 4.0.3 partition support
- VCN 4.0.3 updates
- NBIO 7.9 updates
- GC 9.4.3 updates
- Take NUMA into account when allocating memory
- Handle NUMA for partitions
- SMU 13.0.6 updates
- GC 9.4.3 RAS updates
- Stop including unused swiotlb.h
- SMU 13.0.7 fixes
- Fix clock output ordering on some APUs
- Clean up DC FPGA code
- GFX9 preemption fixes
- Misc irq fixes
- S0ix fixes
- Add new DRM_AMDGPU_WERROR config parameter to help with CI
- PCIe fix for RDNA2
- kdoc fixes
- Documentation updates
amdkfd:
- Query TTM mem limit rather than hardcoding it
- GC 9.4.3 partition support
- Handle NUMA for partitions
radeon:
- Fix possible double free
- Stop including unused swiotlb.h
- Fix possible division by zero
ttm:
- Add query for TTM mem limit
- Add NUMA awareness to pools
- Export ttm_pool_fini()
UAPI:
- Add new ctx query flag to better handle GPU resets
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290
- Add new interface to query and set shadow buffer for RDNA3
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986
- Add new INFO query for max IB size
Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3
amd-drm-next-6.5-2023-06-09:
amdgpu:
- S0ix fixes
- Initial SMU13 Overdrive support
- kdoc fixes
- Misc clode cleanups
- Flexible array fixes
- Display OTG fixes
- SMU 13.0.6 updates
- Revert some broken clock counter updates
- Misc display fixes
- GFX9 preemption fixes
- Add support for newer EEPROM bad page table format
- Add missing radeon secondary id
- Add support for new colorspace KMS API
- CSA fix
- Stable pstate fixes for APUs
- make vbl interface admin only
- Handle PCI accelerator class
amdkfd:
- Add debugger support for gdb
radeon:
- Fix possible UAF
drm:
- Add Colorspace functionality
UAPI:
- Add debugger interface for enabling gdb
Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi
- Add KMS colorspace API
Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230609174817.7764-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 86 |
1 files changed, 80 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 18250845a989..29a2d0499b67 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -34,6 +34,9 @@ static int pm_map_process_v9(struct packet_manager *pm, { struct pm4_mes_map_process *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; + struct kfd_node *kfd = pm->dqm->dev; + struct kfd_process_device *pdd = + container_of(qpd, struct kfd_process_device, qpd); packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); @@ -49,6 +52,12 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->bitfields14.sdma_enable = 1; packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + if (kfd->dqm->trap_debug_vmid && pdd->process->debug_trap_enabled && + pdd->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) { + packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid; + packet->bitfields2.new_debug = 1; + } + packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; if (qpd->tba_addr) { @@ -79,6 +88,10 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, { struct pm4_mes_map_process_aldebaran *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; + struct kfd_dev *kfd = pm->dqm->dev->kfd; + struct kfd_process_device *pdd = + container_of(qpd, struct kfd_process_device, qpd); + int i; packet = (struct pm4_mes_map_process_aldebaran *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); @@ -93,6 +106,16 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, packet->bitfields14.num_oac = qpd->num_oac; packet->bitfields14.sdma_enable = 1; packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + packet->spi_gdbg_per_vmid_cntl = pdd->spi_dbg_override | + pdd->spi_dbg_launch_mode; + + if (pdd->process->debug_trap_enabled) { + for (i = 0; i < kfd->device_info.num_of_watch_points; i++) + packet->tcp_watch_cntl[i] = pdd->watch_points[i]; + + packet->bitfields2.single_memops = + !!(pdd->process->dbg_flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP); + } packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; @@ -119,7 +142,7 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, struct pm4_mes_runlist *packet; int concurrent_proc_cnt = 0; - struct kfd_dev *kfd = pm->dqm->dev; + struct kfd_node *kfd = pm->dqm->dev; /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the @@ -220,13 +243,24 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: use_static = false; /* no static queues under SDMA */ - if (q->properties.sdma_engine_id < 2 && !pm_use_ext_eng(q->device)) + if (q->properties.sdma_engine_id < 2 && + !pm_use_ext_eng(q->device->kfd)) packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; else { - packet->bitfields2.extended_engine_sel = - extended_engine_sel__mes_map_queues__sdma0_to_7_sel; - packet->bitfields2.engine_sel = q->properties.sdma_engine_id; + /* + * For GFX9.4.3, SDMA engine id can be greater than 8. + * For such cases, set extended_engine_sel to 2 and + * ensure engine_sel lies between 0-7. + */ + if (q->properties.sdma_engine_id >= 8) + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_map_queues__sdma8_to_15_sel; + else + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_map_queues__sdma0_to_7_sel; + + packet->bitfields2.engine_sel = q->properties.sdma_engine_id % 8; } break; default: @@ -251,6 +285,41 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } +static int pm_set_grace_period_v9(struct packet_manager *pm, + uint32_t *buffer, + uint32_t grace_period) +{ + struct pm4_mec_write_data_mmio *packet; + uint32_t reg_offset = 0; + uint32_t reg_data = 0; + + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( + pm->dqm->dev->adev, + pm->dqm->wait_times, + grace_period, + ®_offset, + ®_data); + + if (grace_period == USE_DEFAULT_GRACE_PERIOD) + reg_data = pm->dqm->wait_times; + + packet = (struct pm4_mec_write_data_mmio *)buffer; + memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); + + packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA, + sizeof(struct pm4_mec_write_data_mmio)); + + packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register; + packet->bitfields2.addr_incr = + addr_incr___write_data__do_not_increment_address; + + packet->bitfields3.dst_mmreg_addr = reg_offset; + + packet->data = reg_data; + + return 0; +} + static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset) @@ -263,7 +332,8 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, sizeof(struct pm4_mes_unmap_queues)); - packet->bitfields2.extended_engine_sel = pm_use_ext_eng(pm->dqm->dev) ? + packet->bitfields2.extended_engine_sel = + pm_use_ext_eng(pm->dqm->dev->kfd) ? extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel : extended_engine_sel__mes_unmap_queues__legacy_engine_sel; @@ -333,6 +403,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, + .set_grace_period = pm_set_grace_period_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -340,6 +411,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; @@ -350,6 +422,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, + .set_grace_period = pm_set_grace_period_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), @@ -357,6 +430,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; |