aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2023-06-15 14:11:22 +1000
committerDave Airlie <airlied@redhat.com>2023-06-15 14:11:22 +1000
commit901bdf5ea1a836400ee69aa32b04e9c209271ec7 (patch)
treeccb1851c8a71e776dbccf1ccae132dc9b5f093c6 /drivers/gpu/drm/amd/amdkfd/kfd_crat.c
parentba57b9b11f78530146f02b776854b2b6b6d344a4 (diff)
parent3b718dcaf163d17fe907ea098c8449e0cd6bc271 (diff)
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.5-2023-06-02: amdgpu: - SR-IOV fixes - Warning fixes - Misc code cleanups and spelling fixes - DCN 3.2 updates - Improved DC FAMS support for better power management - Improved DC SubVP support for better power management - DCN 3.1.x fixes - Max IB size query - DC GPU reset fixes - RAS updates - DCN 3.0.x fixes - S/G display fixes - CP shadow buffer support - Implement connector force callback - Z8 power improvements - PSP 13.0.10 vbflash support - Mode2 reset fixes - Store MQDs in VRAM to improve queue switch latency - VCN 3.x fixes - JPEG 3.x fixes - Enable DC_FP on LoongArch - GFXOFF fixes - GC 9.4.3 partition support - SDMA 4.4.2 partition support - VCN/JPEG 4.0.3 partition support - VCN 4.0.3 updates - NBIO 7.9 updates - GC 9.4.3 updates - Take NUMA into account when allocating memory - Handle NUMA for partitions - SMU 13.0.6 updates - GC 9.4.3 RAS updates - Stop including unused swiotlb.h - SMU 13.0.7 fixes - Fix clock output ordering on some APUs - Clean up DC FPGA code - GFX9 preemption fixes - Misc irq fixes - S0ix fixes - Add new DRM_AMDGPU_WERROR config parameter to help with CI - PCIe fix for RDNA2 - kdoc fixes - Documentation updates amdkfd: - Query TTM mem limit rather than hardcoding it - GC 9.4.3 partition support - Handle NUMA for partitions radeon: - Fix possible double free - Stop including unused swiotlb.h - Fix possible division by zero ttm: - Add query for TTM mem limit - Add NUMA awareness to pools - Export ttm_pool_fini() UAPI: - Add new ctx query flag to better handle GPU resets Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290 - Add new interface to query and set shadow buffer for RDNA3 Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986 - Add new INFO query for max IB size Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3 amd-drm-next-6.5-2023-06-09: amdgpu: - S0ix fixes - Initial SMU13 Overdrive support - kdoc fixes - Misc clode cleanups - Flexible array fixes - Display OTG fixes - SMU 13.0.6 updates - Revert some broken clock counter updates - Misc display fixes - GFX9 preemption fixes - Add support for newer EEPROM bad page table format - Add missing radeon secondary id - Add support for new colorspace KMS API - CSA fix - Stable pstate fixes for APUs - make vbl interface admin only - Handle PCI accelerator class amdkfd: - Add debugger support for gdb radeon: - Fix possible UAF drm: - Add Colorspace functionality UAPI: - Add debugger interface for enabling gdb Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi - Add KMS colorspace API Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230609174817.7764-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c88
1 files changed, 58 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 475e47027354..3dcd8f8bc98e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -30,6 +30,9 @@
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+/* Fixme: Fake 32GB for 1PNPS1 mode bringup */
+#define DUMMY_VRAM_SIZE 31138512896
+
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
* The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
@@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
props->heap_type = heap_type;
props->flags = flags;
+ if (size_in_bytes == 0)
+ size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */
props->size_in_bytes = size_in_bytes;
props->width = width;
@@ -1166,7 +1171,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
- props->weight = 15 * iolink->num_hops_xgmi;
+ props->weight = iolink->weight_xgmi;
else
props->weight = node_distance(id_from, id_to);
@@ -1405,7 +1410,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
return i;
}
-int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
+int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
@@ -1524,7 +1529,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
num_of_cache_types =
- kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
@@ -1858,7 +1863,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
}
static int kfd_fill_gpu_memory_affinity(int *avail_size,
- struct kfd_dev *kdev, uint8_t type, uint64_t size,
+ struct kfd_node *kdev, uint8_t type, uint64_t size,
struct crat_subtype_memory *sub_type_hdr,
uint32_t proximity_domain,
const struct kfd_local_mem_info *local_mem_info)
@@ -1887,7 +1892,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
}
#ifdef CONFIG_ACPI_NUMA
-static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
{
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
@@ -1972,6 +1977,9 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
}
#endif
+#define KFD_CRAT_INTRA_SOCKET_WEIGHT 13
+#define KFD_CRAT_XGMI_WEIGHT 15
+
/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
* to its NUMA node
* @avail_size: Available size in the memory
@@ -1982,7 +1990,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
* Return 0 if successful else return -ve value
*/
static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
- struct kfd_dev *kdev,
+ struct kfd_node *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
@@ -2002,7 +2010,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
/* Fill in IOLINK subtype.
* TODO: Fill-in other fields of iolink subtype
*/
- if (kdev->adev->gmc.xgmi.connected_to_cpu) {
+ if (kdev->adev->gmc.xgmi.connected_to_cpu ||
+ (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
+ kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
+ AMDGPU_PKG_TYPE_APU)) {
+ bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
+ int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
+ KFD_CRAT_INTRA_SOCKET_WEIGHT;
+ uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
+ kdev->adev, NULL, true) : mem_bw;
+
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -2010,14 +2027,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
*/
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
- sub_type_hdr->num_hops_xgmi = 1;
- if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {
- sub_type_hdr->minimum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->adev, NULL, true);
- sub_type_hdr->maximum_bandwidth_mbs =
- sub_type_hdr->minimum_bandwidth_mbs;
- }
+ sub_type_hdr->weight_xgmi = weight;
+ sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
+ sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
@@ -2029,7 +2041,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->proximity_domain_from = proximity_domain;
#ifdef CONFIG_ACPI_NUMA
- if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
+ if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
+ num_possible_nodes() > 1)
kfd_find_numa_node_in_srat(kdev);
#endif
#ifdef CONFIG_NUMA
@@ -2044,12 +2057,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
}
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
- struct kfd_dev *kdev,
- struct kfd_dev *peer_kdev,
+ struct kfd_node *kdev,
+ struct kfd_node *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
{
+ bool use_ta_info = kdev->kfd->num_nodes == 1;
+
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
return -ENOMEM;
@@ -2064,12 +2079,25 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
- sub_type_hdr->num_hops_xgmi =
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
- sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false);
- sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+
+ if (use_ta_info) {
+ sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
+ amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
+ sub_type_hdr->maximum_bandwidth_mbs =
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
+ peer_kdev->adev, false);
+ sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
+ amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+ } else {
+ bool is_single_hop = kdev->kfd == peer_kdev->kfd;
+ int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
+ (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
+ int mem_bw = 819200;
+
+ sub_type_hdr->weight_xgmi = weight;
+ sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+ sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+ }
return 0;
}
@@ -2081,7 +2109,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
* [OUT] actual size of data filled in crat_image
*/
static int kfd_create_vcrat_image_gpu(void *pcrat_image,
- size_t *size, struct kfd_dev *kdev,
+ size_t *size, struct kfd_node *kdev,
uint32_t proximity_domain)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
@@ -2153,7 +2181,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
- if (!kfd_iommu_check_device(kdev))
+ if (!kfd_iommu_check_device(kdev->kfd))
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
crat_table->length += sub_type_hdr->length;
@@ -2216,12 +2244,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (from other GPU to this GPU) will be added
* in kfd_parse_subtype_iolink.
*/
- if (kdev->hive_id) {
+ if (kdev->kfd->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
- if (peer_dev->gpu->hive_id != kdev->hive_id)
+ if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
@@ -2255,12 +2283,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
* -- this option is not currently implemented.
* The assumption is that all AMD APUs will have CRAT
- * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
*
* Return 0 if successful else return -ve value
*/
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
- int flags, struct kfd_dev *kdev,
+ int flags, struct kfd_node *kdev,
uint32_t proximity_domain)
{
void *pcrat_image = NULL;