diff options
author | Mukul Joshi <mukul.joshi@amd.com> | 2023-08-25 11:59:09 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-09-11 17:10:19 -0400 |
commit | f705a6f021b1ab1d2dd0f82e3d369e0feeb2b123 (patch) | |
tree | a19744ef4386b57b32cec059b77ccfd4105a4570 /drivers/gpu/drm/amd/amdkfd | |
parent | a1ce3e1f7c16c183486b108d80138b8f4cba1dc4 (diff) |
drm/amdgpu: Store CU info from all XCCs for GFX v9.4.3
Currently, we store CU info only for a single XCC assuming
that it is the same for all XCCs. However, that may not be
true. As a result, store CU info for all XCCs. This info is
later used for CU masking.
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 11 |
3 files changed, 12 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2e9612cf56ae..950810bb5c71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2088,7 +2088,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; - cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; + cu->num_simd_cores = cu_info.simd_per_cu * + (cu_info.cu_active_number / kdev->kfd->num_nodes); cu->max_waves_simd = cu_info.max_waves_per_simd; cu->wave_front_size = cu_info.wave_front_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index d01bb57733b3..763966236658 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -104,11 +104,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; + uint32_t cu_active_per_node; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); - if (cu_mask_count > cu_info.cu_active_number) - cu_mask_count = cu_info.cu_active_number; + cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes; + if (cu_mask_count > cu_active_per_node) + cu_mask_count = cu_active_per_node; /* Exceeding these bounds corrupts the stack and indicates a coding error. * Returning with no CU's enabled will hang the queue, which should be @@ -141,7 +143,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) cu_per_sh[se][sh] = hweight32( - cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); + cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); /* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ff98fded9534..c54795682dfb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, offs, "simd_count", - dev->gpu ? (dev->node_props.simd_count * - NUM_XCC(dev->gpu->xcc_mask)) : 0); + dev->gpu ? dev->node_props.simd_count : 0); sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, offs, "caches_count", @@ -1604,7 +1603,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, int i, j, k; struct kfd_cache_properties *pcache = NULL; - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1647,7 +1646,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); k += 4; - cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; + cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); } } @@ -1708,8 +1707,8 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[i % 4][j + i / 4], ct, - cu_processor_id, k); + pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct, + cu_processor_id, k); if (ret < 0) break; |