drm/amdgpu: Store CU info from all XCCs for GFX v9.4.3

Currently, we store CU info only for a single XCC assuming that it is the same for all XCCs. However, that may not be true. As a result, store CU info for all XCCs. This info is later used for CU masking. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Mukul Joshi <mukul.joshi@amd.com> 2023-08-25 11:59:09 -0400
committer: Alex Deucher <alexander.deucher@amd.com> 2023-09-11 17:10:19 -0400
commit: f705a6f021b1ab1d2dd0f82e3d369e0feeb2b123 (patch)
tree: a19744ef4386b57b32cec059b77ccfd4105a4570 /drivers/gpu/drm/amd/amdkfd
parent: a1ce3e1f7c16c183486b108d80138b8f4cba1dc4 (diff)
3 files changed, 12 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 2e9612cf56ae..950810bb5c71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -2088,7 +2088,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 
 	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
 	cu->num_simd_per_cu = cu_info.simd_per_cu;
-	cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
+	cu->num_simd_cores = cu_info.simd_per_cu *
+			(cu_info.cu_active_number / kdev->kfd->num_nodes);
 	cu->max_waves_simd = cu_info.max_waves_per_simd;
 
 	cu->wave_front_size = cu_info.wave_front_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index d01bb57733b3..763966236658 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -104,11 +104,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 	bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
 	uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
 	int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
+	uint32_t cu_active_per_node;
 
 	amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
 
-	if (cu_mask_count > cu_info.cu_active_number)
-		cu_mask_count = cu_info.cu_active_number;
+	cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
+	if (cu_mask_count > cu_active_per_node)
+		cu_mask_count = cu_active_per_node;
 
 	/* Exceeding these bounds corrupts the stack and indicates a coding error.
 	 * Returning with no CU's enabled will hang the queue, which should be
@@ -141,7 +143,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 	for (se = 0; se < cu_info.num_shader_engines; se++)
 		for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
 			cu_per_sh[se][sh] = hweight32(
-				cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
+				cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
 
 	/* Symmetrically map cu_mask to all SEs & SHs:
 	 * se_mask programs up to 2 SH in the upper and lower 16 bits.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ff98fded9534..c54795682dfb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 	sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
 			      dev->node_props.cpu_cores_count);
 	sysfs_show_32bit_prop(buffer, offs, "simd_count",
-			      dev->gpu ? (dev->node_props.simd_count *
-					  NUM_XCC(dev->gpu->xcc_mask)) : 0);
+			      dev->gpu ? dev->node_props.simd_count : 0);
 	sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
 			      dev->node_props.mem_banks_count);
 	sysfs_show_32bit_prop(buffer, offs, "caches_count",
@@ -1604,7 +1603,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
 	int i, j, k;
 	struct kfd_cache_properties *pcache = NULL;
 
-	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+	cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
 	cu_sibling_map_mask &=
 		((1 << pcache_info[cache_type].num_cu_shared) - 1);
 	first_active_cu = ffs(cu_sibling_map_mask);
@@ -1647,7 +1646,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
 				pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
 				k += 4;
 
-				cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
+				cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4];
 				cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
 			}
 		}
@@ -1708,8 +1707,8 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
 					for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
 
 						ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
-										pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
-										cu_processor_id, k);
+									pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct,
+									cu_processor_id, k);
 
 						if (ret < 0)
 							break;
author	Mukul Joshi <mukul.joshi@amd.com>	2023-08-25 11:59:09 -0400
committer	Alex Deucher <alexander.deucher@amd.com>	2023-09-11 17:10:19 -0400
commit	f705a6f021b1ab1d2dd0f82e3d369e0feeb2b123 (patch)
tree	a19744ef4386b57b32cec059b77ccfd4105a4570 /drivers/gpu/drm/amd/amdkfd
parent	a1ce3e1f7c16c183486b108d80138b8f4cba1dc4 (diff)