diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 409 |
1 files changed, 225 insertions, 184 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8e4124dcb6e4..c8c75ff7cea8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -36,8 +36,8 @@ #include "kfd_crat.h" #include "kfd_topology.h" #include "kfd_device_queue_manager.h" -#include "kfd_iommu.h" #include "kfd_svm.h" +#include "kfd_debug.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" #include "amdgpu.h" @@ -96,7 +96,7 @@ struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id) return ret; } -struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) +struct kfd_node *kfd_device_by_id(uint32_t gpu_id) { struct kfd_topology_device *top_dev; @@ -107,10 +107,10 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) return top_dev->gpu; } -struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) +struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; + struct kfd_node *device = NULL; down_read(&topology_lock); @@ -125,24 +125,6 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev) -{ - struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; - - down_read(&topology_lock); - - list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu && top_dev->gpu->adev == adev) { - device = top_dev->gpu; - break; - } - - up_read(&topology_lock); - - return device; -} - /* Called with write topology_lock acquired */ static void kfd_release_topology_device(struct kfd_topology_device *dev) { @@ -492,7 +474,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "wave_front_size", dev->node_props.wave_front_size); sysfs_show_32bit_prop(buffer, offs, "array_count", - dev->node_props.array_count); + dev->gpu ? (dev->node_props.array_count * + NUM_XCC(dev->gpu->xcc_mask)) : 0); sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine", dev->node_props.simd_arrays_per_engine); sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array", @@ -526,7 +509,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (dev->gpu) { log_max_watch_addr = - __ilog2_u32(dev->gpu->device_info.num_of_watch_points); + __ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points); if (log_max_watch_addr) { dev->node_props.capability |= @@ -548,14 +531,17 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL); sysfs_show_32bit_prop(buffer, offs, "fw_version", - dev->gpu->mec_fw_version); + dev->gpu->kfd->mec_fw_version); sysfs_show_32bit_prop(buffer, offs, "capability", dev->node_props.capability); + sysfs_show_64bit_prop(buffer, offs, "debug_prop", + dev->node_props.debug_prop); sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", - dev->gpu->sdma_fw_version); + dev->gpu->kfd->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", dev->gpu->adev->unique_id); - + sysfs_show_32bit_prop(buffer, offs, "num_xcc", + NUM_XCC(dev->gpu->xcc_mask)); } return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute", @@ -1001,17 +987,6 @@ static void find_system_memory(const struct dmi_header *dm, } } -/* - * Performance counters information is not part of CRAT but we would like to - * put them in the sysfs under topology directory for Thunk to get the data. - * This function is called before updating the sysfs. - */ -static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) -{ - /* These are the only counters supported so far */ - return kfd_iommu_add_perf_counters(kdev); -} - /* kfd_add_non_crat_information - Add information that is not currently * defined in CRAT but is necessary for KFD topology * @dev - topology device to which addition info is added @@ -1026,25 +1001,6 @@ static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ } -/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. - * Ignore CRAT for all other devices. AMD APU is identified if both CPU - * and GPU cores are present. - * @device_list - topology device list created by parsing ACPI CRAT table. - * @return - TRUE if invalid, FALSE is valid. - */ -static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) -{ - struct kfd_topology_device *dev; - - list_for_each_entry(dev, device_list, list) { - if (dev->node_props.cpu_cores_count && - dev->node_props.simd_count) - return false; - } - pr_info("Ignoring ACPI CRAT on non-APU system\n"); - return true; -} - int kfd_topology_init(void) { void *crat_image = NULL; @@ -1075,48 +1031,25 @@ int kfd_topology_init(void) */ proximity_domain = 0; - /* - * Get the CRAT image from the ACPI. If ACPI doesn't have one - * or if ACPI CRAT is invalid create a virtual CRAT. - * NOTE: The current implementation expects all AMD APUs to have - * CRAT. If no CRAT is available, it is assumed to be a CPU - */ - ret = kfd_create_crat_image_acpi(&crat_image, &image_size); - if (!ret) { - ret = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, - proximity_domain); - if (ret || - kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { - kfd_release_topology_device_list( - &temp_topology_device_list); - kfd_destroy_crat_image(crat_image); - crat_image = NULL; - } + ret = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_CPU, NULL, + proximity_domain); + cpu_only_node = 1; + if (ret) { + pr_err("Error creating VCRAT table for CPU\n"); + return ret; } - if (!crat_image) { - ret = kfd_create_crat_image_virtual(&crat_image, &image_size, - COMPUTE_UNIT_CPU, NULL, - proximity_domain); - cpu_only_node = 1; - if (ret) { - pr_err("Error creating VCRAT table for CPU\n"); - return ret; - } - - ret = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, - proximity_domain); - if (ret) { - pr_err("Error parsing VCRAT table for CPU\n"); - goto err; - } + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret) { + pr_err("Error parsing VCRAT table for CPU\n"); + goto err; } kdev = list_first_entry(&temp_topology_device_list, struct kfd_topology_device, list); - kfd_add_perf_to_topology(kdev); down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, @@ -1157,10 +1090,10 @@ void kfd_topology_shutdown(void) up_write(&topology_lock); } -static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) +static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) { uint32_t hashout; - uint32_t buf[7]; + uint32_t buf[8]; uint64_t local_mem_size; int i; @@ -1177,8 +1110,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) buf[4] = gpu->adev->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); buf[6] = upper_32_bits(local_mem_size); + buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); - for (i = 0, hashout = 0; i < 7; i++) + for (i = 0, hashout = 0; i < 8; i++) hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); return hashout; @@ -1188,7 +1122,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) * list then return NULL. This means a new topology device has to * be created for this GPU. */ -static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) +static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; @@ -1201,8 +1135,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) /* Discrete GPUs need their own topology device list * entries. Don't assign them to CPU/APU nodes. */ - if (!gpu->use_iommu_v2 && - dev->node_props.cpu_cores_count) + if (dev->node_props.cpu_cores_count) continue; if (!dev->gpu && (dev->node_props.simd_count > 0)) { @@ -1248,7 +1181,8 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info, + dev->gpu->xcp); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; @@ -1275,7 +1209,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; /* set gpu (dev) flags. */ } else { - if (!dev->gpu->pci_atomic_requested || + if (!dev->gpu->kfd->pci_atomic_requested || dev->gpu->adev->asic_type == CHIP_HAWAII) link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; @@ -1323,10 +1257,16 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) continue; /* Include the CPU peer in GPU hive if connected over xGMI. */ - if (!peer_dev->gpu && !peer_dev->node_props.hive_id && - dev->node_props.hive_id && - dev->gpu->adev->gmc.xgmi.connected_to_cpu) + if (!peer_dev->gpu && + link->iolink_type == CRAT_IOLINK_TYPE_XGMI) { + /* + * If the GPU is not part of a GPU hive, use its pci + * device location as the hive ID to bind with the CPU. + */ + if (!dev->node_props.hive_id) + dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev); peer_dev->node_props.hive_id = dev->node_props.hive_id; + } list_for_each_entry(inbound_link, &peer_dev->io_link_props, list) { @@ -1569,8 +1509,8 @@ static int kfd_dev_create_p2p_links(void) if (dev == new_dev) break; if (!dev->gpu || !dev->gpu->adev || - (dev->gpu->hive_id && - dev->gpu->hive_id == new_dev->gpu->hive_id)) + (dev->gpu->kfd->hive_id && + dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) goto next; /* check if node(s) is/are peer accessible in one direction or bi-direction */ @@ -1590,7 +1530,6 @@ out: return ret; } - /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, struct kfd_gpu_cache_info *pcache_info, @@ -1657,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, struct kfd_gpu_cache_info *pcache_info, struct kfd_cu_info *cu_info, - int cache_type, unsigned int cu_processor_id) + int cache_type, unsigned int cu_processor_id, + struct kfd_node *knode) { unsigned int cu_sibling_map_mask; int first_active_cu; - int i, j, k; + int i, j, k, xcc, start, end; struct kfd_cache_properties *pcache = NULL; - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + start = ffs(knode->xcc_mask) - 1; + end = start + NUM_XCC(knode->xcc_mask); + cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1699,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); k = 0; - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { - pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - k += 4; - - cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; - cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; + + cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4]; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + } } } pcache->sibling_map_size = k; @@ -1723,10 +1667,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, /* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info * tables */ -static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) +static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) { struct kfd_gpu_cache_info *pcache_info = NULL; - int i, j, k; + int i, j, k, xcc, start, end; int ct = 0; unsigned int cu_processor_id; int ret; @@ -1760,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct * then it will consider only one CU from * the shared unit */ + start = ffs(kdev->xcc_mask) - 1; + end = start + NUM_XCC(kdev->xcc_mask); + for (ct = 0; ct < num_of_cache_types; ct++) { cu_processor_id = gpu_processor_id; if (pcache_info[ct].cache_level == 1) { - for (i = 0; i < pcu_info->num_shader_engines; i++) { - for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { - for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < pcu_info->num_shader_engines; i++) { + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { - ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[i % 4][j + i / 4], ct, + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, + pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct, cu_processor_id, k); - if (ret < 0) - break; + if (ret < 0) + break; - if (!ret) { - num_of_entries++; - list_add_tail(&props_ext->list, &dev->cache_props); - } + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } - /* Move to next CU block */ - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= - pcu_info->num_cu_per_sh) ? - pcache_info[ct].num_cu_shared : - (pcu_info->num_cu_per_sh - k); - cu_processor_id += num_cu_shared; + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + pcu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (pcu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; + } } } } } else { ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, - pcu_info, ct, cu_processor_id); + pcu_info, ct, cu_processor_id, kdev); if (ret < 0) break; @@ -1805,7 +1754,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct pr_debug("Added [%d] GPU cache entries\n", num_of_entries); } -static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id, +static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id, struct kfd_topology_device **dev) { int proximity_domain = ++topology_crat_proximity_domain; @@ -1865,7 +1814,107 @@ err: return res; } -int kfd_topology_add_device(struct kfd_dev *gpu) +static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev) +{ + bool firmware_supported = true; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) { + uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version & + AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT; + uint32_t mes_rev = dev->gpu->adev->mes.sched_version & + AMDGPU_MES_VERSION_MASK; + + firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64); + goto out; + } + + /* + * Note: Any unlisted devices here are assumed to support exception handling. + * Add additional checks here as needed. + */ + switch (KFD_GC_VERSION(dev->gpu)) { + case IP_VERSION(9, 0, 1): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768; + break; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 3, 0): + case IP_VERSION(9, 4, 0): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 459; + break; + case IP_VERSION(9, 4, 1): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 60; + break; + case IP_VERSION(9, 4, 2): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 51; + break; + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 1): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 144; + break; + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 1): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + firmware_supported = dev->gpu->kfd->mec_fw_version >= 89; + break; + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 3, 3): + firmware_supported = false; + break; + default: + break; + } + +out: + if (firmware_supported) + dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED; +} + +static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) +{ + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + + dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT | + HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | + HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; + + if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) + dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; + + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { + if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) + dev->node_props.debug_prop |= + HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3; + else + dev->node_props.debug_prop |= + HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) + dev->node_props.capability |= + HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + } else { + dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) + dev->node_props.capability |= + HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + } + + kfd_topology_set_dbg_firmware_support(dev); +} + +int kfd_topology_add_device(struct kfd_node *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; @@ -1875,7 +1924,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu) const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type]; gpu_id = kfd_generate_gpu_id(gpu); - pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + if (gpu->xcp && !gpu->xcp->ddev) { + dev_warn(gpu->adev->dev, + "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.", + gpu_id); + return 0; + } else { + pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + } /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, @@ -1916,28 +1972,37 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; - dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version; + dev->node_props.gfx_target_version = + gpu->kfd->device_info.gfx_target_version; dev->node_props.vendor_id = gpu->adev->pdev->vendor; dev->node_props.device_id = gpu->adev->pdev->device; dev->node_props.capability |= ((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) & HSA_CAP_ASIC_REVISION_MASK); + dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); + if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) + dev->node_props.location_id |= dev->gpu->node_id; + dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; - dev->node_props.drm_render_minor = - gpu->shared_resources.drm_render_minor; - dev->node_props.hive_id = gpu->hive_id; + if (gpu->xcp) + dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index; + else + dev->node_props.drm_render_minor = + gpu->kfd->shared_resources.drm_render_minor; + + dev->node_props.hive_id = gpu->kfd->hive_id; dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu); dev->node_props.num_sdma_xgmi_engines = kfd_get_num_xgmi_sdma_engines(gpu); dev->node_props.num_sdma_queues_per_engine = - gpu->device_info.num_sdma_queues_per_engine - - gpu->device_info.num_reserved_sdma_queues_per_engine; + gpu->kfd->device_info.num_sdma_queues_per_engine - + gpu->kfd->device_info.num_reserved_sdma_queues_per_engine; dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? dev->gpu->adev->gds.gws_size : 0; @@ -1966,23 +2031,18 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; default: - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) - dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << - HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & - HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); - else + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1)) WARN(1, "Unexpected ASIC family %u", dev->gpu->adev->asic_type); + else + kfd_topology_set_capabilities(dev); } /* * Overwrite ATS capability according to needs_iommu_device to fix * potential missing corresponding bit in CRAT of BIOS. */ - if (dev->gpu->use_iommu_v2) - dev->node_props.capability |= HSA_CAP_ATS_PRESENT; - else - dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; + dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT; /* Fix errors in CZ CRAT. * simd_count: Carrizo CRAT reports wrong simd_count, probably @@ -2007,9 +2067,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; - if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev)) + if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; + if (dev->gpu->adev->gmc.is_app_apu || + dev->gpu->adev->gmc.xgmi.connected_to_cpu) + dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; + kfd_debug_print_topology(); kfd_notify_gpu_change(gpu_id, 1); @@ -2079,7 +2143,7 @@ static void kfd_topology_update_io_links(int proximity_domain) } } -int kfd_topology_remove_device(struct kfd_dev *gpu) +int kfd_topology_remove_device(struct kfd_node *gpu) { struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; @@ -2119,7 +2183,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) * Return - 0: On success (@kdev will be NULL for non GPU nodes) * -1: If end of list */ -int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev) { struct kfd_topology_device *top_dev; @@ -2173,29 +2237,6 @@ int kfd_numa_node_to_apic_id(int numa_node_id) return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); } -void kfd_double_confirm_iommu_support(struct kfd_dev *gpu) -{ - struct kfd_topology_device *dev; - - gpu->use_iommu_v2 = false; - - if (!gpu->device_info.needs_iommu_device) - return; - - down_read(&topology_lock); - - /* Only use IOMMUv2 if there is an APU topology node with no GPU - * assigned yet. This GPU will be assigned to it. - */ - list_for_each_entry(dev, &topology_device_list, list) - if (dev->node_props.cpu_cores_count && - dev->node_props.simd_count && - !dev->gpu) - gpu->use_iommu_v2 = true; - - up_read(&topology_lock); -} - #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) |