aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c748
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c233
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c18
15 files changed, 329 insertions, 754 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e1294fba0c26..facc28f58c1f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -53,680 +53,242 @@ extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
-#ifdef KFD_SUPPORT_IOMMU_V2
-static const struct kfd_device_info kaveri_device_info = {
- .asic_name = "kaveri",
- .gfx_target_version = 70000,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info carrizo_device_info = {
- .asic_name = "carrizo",
- .gfx_target_version = 80001,
- .max_pasid_bits = 16,
- /* max num of queues for CZ.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info raven_device_info = {
- .asic_name = "raven",
- .gfx_target_version = 90002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = true,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-#ifdef CONFIG_DRM_AMDGPU_CIK
-static const struct kfd_device_info hawaii_device_info = {
- .asic_name = "hawaii",
- .gfx_target_version = 70001,
- .max_pasid_bits = 16,
- /* max num of queues for KV.TODO should be a dynamic value */
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-#endif
-
-static const struct kfd_device_info tonga_device_info = {
- .asic_name = "tonga",
- .gfx_target_version = 80002,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = false,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_device_info = {
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info fiji_vf_device_info = {
- .asic_name = "fiji",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-
-static const struct kfd_device_info polaris10_device_info = {
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris10_vf_device_info = {
- .asic_name = "polaris10",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris11_device_info = {
- .asic_name = "polaris11",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info polaris12_device_info = {
- .asic_name = "polaris12",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vegam_device_info = {
- .asic_name = "vegam",
- .gfx_target_version = 80003,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 4,
- .ih_ring_entry_size = 4 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_cik,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_device_info = {
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega10_vf_device_info = {
- .asic_name = "vega10",
- .gfx_target_version = 90000,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega12_device_info = {
- .asic_name = "vega12",
- .gfx_target_version = 90004,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info vega20_device_info = {
- .asic_name = "vega20",
- .gfx_target_version = 90006,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info arcturus_device_info = {
- .asic_name = "arcturus",
- .gfx_target_version = 90008,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info aldebaran_device_info = {
- .asic_name = "aldebaran",
- .gfx_target_version = 90010,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info renoir_device_info = {
- .asic_name = "renoir",
- .gfx_target_version = 90012,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .supports_cwsr = true,
- .needs_iommu_device = false,
- .needs_pci_atomics = false,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info navi10_device_info = {
- .asic_name = "navi10",
- .gfx_target_version = 100100,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi12_device_info = {
- .asic_name = "navi12",
- .gfx_target_version = 100101,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navi14_device_info = {
- .asic_name = "navi14",
- .gfx_target_version = 100102,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 145,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info sienna_cichlid_device_info = {
- .asic_name = "sienna_cichlid",
- .gfx_target_version = 100300,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info navy_flounder_device_info = {
- .asic_name = "navy_flounder",
- .gfx_target_version = 100301,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info vangogh_device_info = {
- .asic_name = "vangogh",
- .gfx_target_version = 100303,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info dimgrey_cavefish_device_info = {
- .asic_name = "dimgrey_cavefish",
- .gfx_target_version = 100302,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info beige_goby_device_info = {
- .asic_name = "beige_goby",
- .gfx_target_version = 100304,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_queues_per_engine = 8,
-};
-
-static const struct kfd_device_info yellow_carp_device_info = {
- .asic_name = "yellow_carp",
- .gfx_target_version = 100305,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .no_atomic_fw_version = 92,
- .num_sdma_queues_per_engine = 2,
-};
-
-static const struct kfd_device_info cyan_skillfish_device_info = {
- .asic_name = "cyan_skillfish",
- .gfx_target_version = 100103,
- .max_pasid_bits = 16,
- .max_no_of_hqd = 24,
- .doorbell_size = 8,
- .ih_ring_entry_size = 8 * sizeof(uint32_t),
- .event_interrupt_class = &event_interrupt_class_v9,
- .num_of_watch_points = 4,
- .mqd_size_aligned = MQD_SIZE_ALIGNED,
- .needs_iommu_device = false,
- .supports_cwsr = true,
- .needs_pci_atomics = true,
- .num_sdma_queues_per_engine = 8,
-};
-
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
+static void kfd_device_info_init(struct kfd_dev *kfd,
+ bool vf, uint32_t gfx_target_version)
+{
+ uint32_t gc_version = KFD_GC_VERSION(kfd);
+ uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
+ uint32_t asic_type = kfd->adev->asic_type;
+
+ kfd->device_info.max_pasid_bits = 16;
+ kfd->device_info.max_no_of_hqd = 24;
+ kfd->device_info.num_of_watch_points = 4;
+ kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
+ kfd->device_info.gfx_target_version = gfx_target_version;
+
+ if (KFD_IS_SOC15(kfd)) {
+ kfd->device_info.doorbell_size = 8;
+ kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+ kfd->device_info.supports_cwsr = true;
+
+ if ((sdma_version >= IP_VERSION(4, 0, 0) &&
+ sdma_version <= IP_VERSION(4, 2, 0)) ||
+ sdma_version == IP_VERSION(5, 2, 1) ||
+ sdma_version == IP_VERSION(5, 2, 3))
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+ else
+ kfd->device_info.num_sdma_queues_per_engine = 8;
+
+ /* Raven */
+ if (gc_version == IP_VERSION(9, 1, 0) ||
+ gc_version == IP_VERSION(9, 2, 2))
+ kfd->device_info.needs_iommu_device = true;
+
+ if (gc_version < IP_VERSION(11, 0, 0)) {
+ /* Navi2x+, Navi1x+ */
+ if (gc_version >= IP_VERSION(10, 3, 0))
+ kfd->device_info.no_atomic_fw_version = 92;
+ else if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.no_atomic_fw_version = 145;
+
+ /* Navi1x+ */
+ if (gc_version >= IP_VERSION(10, 1, 1))
+ kfd->device_info.needs_pci_atomics = true;
+ }
+ } else {
+ kfd->device_info.doorbell_size = 4;
+ kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
+ kfd->device_info.num_sdma_queues_per_engine = 2;
+
+ if (asic_type != CHIP_KAVERI &&
+ asic_type != CHIP_HAWAII &&
+ asic_type != CHIP_TONGA)
+ kfd->device_info.supports_cwsr = true;
+
+ if (asic_type == CHIP_KAVERI ||
+ asic_type == CHIP_CARRIZO)
+ kfd->device_info.needs_iommu_device = true;
+
+ if (asic_type != CHIP_HAWAII && !vf)
+ kfd->device_info.needs_pci_atomics = true;
+ }
+}
+
struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
{
- struct kfd_dev *kfd;
- const struct kfd_device_info *device_info;
- const struct kfd2kgd_calls *f2g;
+ struct kfd_dev *kfd = NULL;
+ const struct kfd2kgd_calls *f2g = NULL;
struct pci_dev *pdev = adev->pdev;
+ uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
- if (vf)
- device_info = NULL;
- else
- device_info = &kaveri_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70000;
+ if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_CARRIZO:
- if (vf)
- device_info = NULL;
- else
- device_info = &carrizo_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80001;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
- if (vf)
- device_info = NULL;
- else
- device_info = &hawaii_device_info;
- f2g = &gfx_v7_kfd2kgd;
+ gfx_target_version = 70001;
+ if (!amdgpu_exp_hw_support)
+ pr_info(
+ "KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
+ );
+ else if (!vf)
+ f2g = &gfx_v7_kfd2kgd;
break;
#endif
case CHIP_TONGA:
- if (vf)
- device_info = NULL;
- else
- device_info = &tonga_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80002;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_FIJI:
- if (vf)
- device_info = &fiji_vf_device_info;
- else
- device_info = &fiji_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS10:
- if (vf)
- device_info = &polaris10_vf_device_info;
- else
- device_info = &polaris10_device_info;
+ gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS11:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris11_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS12:
- if (vf)
- device_info = NULL;
- else
- device_info = &polaris12_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_VEGAM:
- if (vf)
- device_info = NULL;
- else
- device_info = &vegam_device_info;
- f2g = &gfx_v8_kfd2kgd;
+ gfx_target_version = 80003;
+ if (!vf)
+ f2g = &gfx_v8_kfd2kgd;
break;
default:
switch (adev->ip_versions[GC_HWIP][0]) {
+ /* Vega 10 */
case IP_VERSION(9, 0, 1):
- if (vf)
- device_info = &vega10_vf_device_info;
- else
- device_info = &vega10_device_info;
+ gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
#ifdef KFD_SUPPORT_IOMMU_V2
+ /* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
- if (vf)
- device_info = NULL;
- else
- device_info = &raven_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90002;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
#endif
+ /* Vega12 */
case IP_VERSION(9, 2, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega12_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90004;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Renoir */
case IP_VERSION(9, 3, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &renoir_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90012;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Vega20 */
case IP_VERSION(9, 4, 0):
- if (vf)
- device_info = NULL;
- else
- device_info = &vega20_device_info;
- f2g = &gfx_v9_kfd2kgd;
+ gfx_target_version = 90006;
+ if (!vf)
+ f2g = &gfx_v9_kfd2kgd;
break;
+ /* Arcturus */
case IP_VERSION(9, 4, 1):
- device_info = &arcturus_device_info;
+ gfx_target_version = 90008;
f2g = &arcturus_kfd2kgd;
break;
+ /* Aldebaran */
case IP_VERSION(9, 4, 2):
- device_info = &aldebaran_device_info;
+ gfx_target_version = 90010;
f2g = &aldebaran_kfd2kgd;
break;
+ /* Navi10 */
case IP_VERSION(10, 1, 10):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi10_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100100;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi12 */
case IP_VERSION(10, 1, 2):
- device_info = &navi12_device_info;
+ gfx_target_version = 100101;
f2g = &gfx_v10_kfd2kgd;
break;
+ /* Navi14 */
case IP_VERSION(10, 1, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &navi14_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100102;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Cyan Skillfish */
case IP_VERSION(10, 1, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &cyan_skillfish_device_info;
- f2g = &gfx_v10_kfd2kgd;
+ gfx_target_version = 100103;
+ if (!vf)
+ f2g = &gfx_v10_kfd2kgd;
break;
+ /* Sienna Cichlid */
case IP_VERSION(10, 3, 0):
- device_info = &sienna_cichlid_device_info;
+ gfx_target_version = 100300;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Navy Flounder */
case IP_VERSION(10, 3, 2):
- device_info = &navy_flounder_device_info;
+ gfx_target_version = 100301;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Van Gogh */
case IP_VERSION(10, 3, 1):
- if (vf)
- device_info = NULL;
- else
- device_info = &vangogh_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100303;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Dimgrey Cavefish */
case IP_VERSION(10, 3, 4):
- device_info = &dimgrey_cavefish_device_info;
+ gfx_target_version = 100302;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Beige Goby */
case IP_VERSION(10, 3, 5):
- device_info = &beige_goby_device_info;
+ gfx_target_version = 100304;
f2g = &gfx_v10_3_kfd2kgd;
break;
+ /* Yellow Carp */
case IP_VERSION(10, 3, 3):
- if (vf)
- device_info = NULL;
- else
- device_info = &yellow_carp_device_info;
- f2g = &gfx_v10_3_kfd2kgd;
+ gfx_target_version = 100305;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
break;
default:
- return NULL;
+ break;
}
break;
}
- if (!device_info || !f2g) {
+ if (!f2g) {
if (adev->ip_versions[GC_HWIP][0])
dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
@@ -741,7 +303,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
return NULL;
kfd->adev = adev;
- kfd->device_info = device_info;
+ kfd_device_info_init(kfd, vf, gfx_target_version);
kfd->pdev = pdev;
kfd->init_complete = false;
kfd->kfd2kgd = f2g;
@@ -760,7 +322,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
- if (cwsr_enable && kfd->device_info->supports_cwsr) {
+ if (cwsr_enable && kfd->device_info.supports_cwsr) {
if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
@@ -844,14 +406,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
*/
kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
if (!kfd->pci_atomic_requested &&
- kfd->device_info->needs_pci_atomics &&
- (!kfd->device_info->no_atomic_fw_version ||
- kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
+ kfd->device_info.needs_pci_atomics &&
+ (!kfd->device_info.no_atomic_fw_version ||
+ kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics %d<%d\n",
kfd->pdev->vendor, kfd->pdev->device,
kfd->mec_fw_version,
- kfd->device_info->no_atomic_fw_version);
+ kfd->device_info.no_atomic_fw_version);
return false;
}
@@ -868,7 +430,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
- kfd->device_info->mqd_size_aligned;
+ kfd->device_info.mqd_size_aligned;
/*
* calculate max size of runlist packet.
@@ -1143,7 +705,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
if (!kfd->init_complete)
return;
- if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
+ if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
dev_err_once(kfd_device, "Ring entry too small\n");
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2af2b3268171..dd0b952f0173 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -108,13 +108,13 @@ static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return kfd_get_num_sdma_engines(dqm->dev) *
- dqm->dev->device_info->num_sdma_queues_per_engine;
+ dqm->dev->device_info.num_sdma_queues_per_engine;
}
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
- dqm->dev->device_info->num_sdma_queues_per_engine;
+ dqm->dev->device_info.num_sdma_queues_per_engine;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
@@ -1838,7 +1838,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
get_num_all_sdma_engines(dqm) *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->device_info.num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
@@ -2082,7 +2082,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0;
- queue < dqm->dev->device_info->num_sdma_queues_per_engine;
+ queue < dqm->dev->device_info.num_sdma_queues_per_engine;
queue++) {
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
dqm->dev->adev, pipe, queue, &dump, &n_regs);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 768d153acff4..0dbcf54657ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -48,7 +48,7 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(kfd->device_info->doorbell_size *
+ return roundup(kfd->device_info.doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
@@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
- inx *= kfd->device_info->doorbell_size / sizeof(u32);
+ inx *= kfd->device_info.doorbell_size / sizeof(u32);
/*
* Calculating the kernel doorbell offset using the first
@@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
- * sizeof(u32) / kfd->device_info->doorbell_size;
+ * sizeof(u32) / kfd->device_info.doorbell_size;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_available_index);
@@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
return kfd->doorbell_base_dw_offset +
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
- doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
+ doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 20512a4e9a91..deb64168c9e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -135,7 +135,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
*patched_flag = true;
memcpy(patched_ihre, ih_ring_entry,
- dev->device_info->ih_ring_entry_size);
+ dev->device_info.ih_ring_entry_size);
pasid = dev->dqm->vmid_pasid[vmid];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index bc47f6a44456..81887c2013c9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
int r;
r = kfifo_alloc(&kfd->ih_fifo,
- KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+ KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
GFP_KERNEL);
if (r) {
dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
@@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
int count;
count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
- if (count != kfd->device_info->ih_ring_entry_size) {
+ kfd->device_info.ih_ring_entry_size);
+ if (count != kfd->device_info.ih_ring_entry_size) {
dev_err_ratelimited(kfd_chardev(),
"Interrupt ring overflow, dropping interrupt %d\n",
count);
@@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
int count;
count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
+ kfd->device_info.ih_ring_entry_size);
- WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
+ WARN_ON(count && count != kfd->device_info.ih_ring_entry_size);
- return count == kfd->device_info->ih_ring_entry_size;
+ return count == kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
@@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work)
interrupt_work);
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
- if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
+ if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
dev_err_once(kfd_chardev(), "Ring entry too small\n");
return;
}
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->event_interrupt_class->interrupt_wq(dev,
+ dev->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
}
@@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0;
- wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
+ wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,
ih_ring_entry, patched_ihre, flag);
return wanted != 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 73f2257acc23..66ad8d0b8f7f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
}
pasid_limit = min_t(unsigned int,
- (unsigned int)(1 << kfd->device_info->max_pasid_bits),
+ (unsigned int)(1 << kfd->device_info.max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 406479a369a9..16f8bc4ca7f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
+ retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
@@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq)
void kq_rollback_packet(struct kernel_queue *kq)
{
- if (kq->dev->device_info->doorbell_size == 8) {
+ if (kq->dev->device_info.doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
kq->pending_wptr = *kq->wptr_kernel %
(kq->queue->properties.queue_size / 4);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d84cec0022b1..48c2f2b6e217 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -108,8 +108,8 @@ error_free:
* svm_migrate_copy_memory_gart - sdma copy data between ram and vram
*
* @adev: amdgpu device the sdma ring running
- * @src: source page address array
- * @dst: destination page address array
+ * @sys: system DMA pointer to be copied
+ * @vram: vram destination DMA pointer
* @npages: number of pages to copy
* @direction: enum MIGRATION_COPY_DIR
* @mfence: output, sdma fence to signal after sdma is done
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 7b4118915bf6..e2825ad4d699 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
return NULL;
offset = (q->sdma_engine_id *
- dev->device_info->num_sdma_queues_per_engine +
+ dev->device_info.num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 08442e7d9944..3c0658e32e93 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -110,8 +110,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
- struct scheduling_resources *res)
+static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
{
struct pm4_mes_set_resources *packet;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7ea528941951..0c3f911e3bf4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -195,7 +195,6 @@ struct kfd_event_interrupt_class {
};
struct kfd_device_info {
- const char *asic_name;
uint32_t gfx_target_version;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
@@ -231,7 +230,7 @@ struct kfd_vmid_info {
struct kfd_dev {
struct amdgpu_device *adev;
- const struct kfd_device_info *device_info;
+ struct kfd_device_info device_info;
struct pci_dev *pdev;
struct drm_device *ddev;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d4c8a6948a9f..f1930ff2c74a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -251,14 +251,13 @@ cleanup:
}
/**
- * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
+ * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
* by current process. Translates acquired wave count into number of compute units
* that are occupied.
*
- * @atr: Handle of attribute that allows reporting of wave count. The attribute
+ * @attr: Handle of attribute that allows reporting of wave count. The attribute
* handle encapsulates GPU device it is associated with, thereby allowing collection
* of waves in flight, etc
- *
* @buffer: Handle of user provided buffer updated with wave count
*
* Return: Number of bytes written to user buffer or an error value
@@ -1011,7 +1010,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
- kfree(pdd->qpd.doorbell_bitmap);
+ bitmap_free(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
@@ -1433,9 +1432,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
if (!KFD_IS_SOC15(dev))
return 0;
- qpd->doorbell_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!qpd->doorbell_bitmap)
return -ENOMEM;
@@ -1447,9 +1445,9 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
if (i >= range_start && i <= range_end) {
- set_bit(i, qpd->doorbell_bitmap);
- set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- qpd->doorbell_bitmap);
+ __set_bit(i, qpd->doorbell_bitmap);
+ __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ qpd->doorbell_bitmap);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 4f8464658daf..5e5c84a8e1ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -135,9 +135,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
INIT_LIST_HEAD(&pqm->queues);
- pqm->queue_slot_bitmap =
- kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- BITS_PER_BYTE), GFP_KERNEL);
+ pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
if (!pqm->queue_slot_bitmap)
return -ENOMEM;
pqm->process = p;
@@ -159,7 +158,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
kfree(pqn);
}
- kfree(pqm->queue_slot_bitmap);
+ bitmap_free(pqm->queue_slot_bitmap);
pqm->queue_slot_bitmap = NULL;
}
@@ -220,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
- max_queues = dev->device_info->max_no_of_hqd/2;
+ max_queues = dev->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 755265f6c53b..7e92dcea4ce8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -704,6 +704,61 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
}
}
+static bool
+svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+ uint32_t i;
+ int gpuidx;
+
+ for (i = 0; i < nattr; i++) {
+ switch (attrs[i].type) {
+ case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+ if (prange->preferred_loc != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+ /* Prefetch should always trigger a migration even
+ * if the value of the attribute didn't change.
+ */
+ return false;
+ case KFD_IOCTL_SVM_ATTR_ACCESS:
+ case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ attrs[i].value);
+ if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+ if (test_bit(gpuidx, prange->bitmap_access) ||
+ test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+ if (!test_bit(gpuidx, prange->bitmap_access))
+ return false;
+ } else {
+ if (!test_bit(gpuidx, prange->bitmap_aip))
+ return false;
+ }
+ break;
+ case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+ if ((prange->flags & attrs[i].value) != attrs[i].value)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+ if ((prange->flags & attrs[i].value) != 0)
+ return false;
+ break;
+ case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+ if (prange->granularity != attrs[i].value)
+ return false;
+ break;
+ default:
+ WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+ }
+ }
+
+ return true;
+}
+
/**
* svm_range_debug_dump - print all range information from svms
* @svms: svm range list header
@@ -741,14 +796,6 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
-static bool
-svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
-{
- return (old->prefetch_loc == new->prefetch_loc &&
- old->flags == new->flags &&
- old->granularity == new->granularity);
-}
-
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
@@ -941,7 +988,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
}
static int
-svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
+svm_range_split_tail(struct svm_range *prange,
uint64_t new_last, struct list_head *insert_list)
{
struct svm_range *tail;
@@ -953,7 +1000,7 @@ svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
}
static int
-svm_range_split_head(struct svm_range *prange, struct svm_range *new,
+svm_range_split_head(struct svm_range *prange,
uint64_t new_start, struct list_head *insert_list)
{
struct svm_range *head;
@@ -1169,7 +1216,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned long npages, bool readonly, dma_addr_t *dma_addr,
struct amdgpu_device *bo_adev, struct dma_fence **fence)
{
- struct amdgpu_bo_va bo_va;
bool table_freed = false;
uint64_t pte_flags;
unsigned long last_start;
@@ -1182,9 +1228,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
last_start, last_start + npages - 1, readonly);
- if (prange->svm_bo && prange->ttm_res)
- bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
-
for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
@@ -1650,6 +1693,10 @@ out_reschedule:
/**
* svm_range_evict - evict svm range
+ * @prange: svm range structure
+ * @mm: current process mm_struct
+ * @start: starting process queue number
+ * @last: last process queue number
*
* Stop all queues of the process to ensure GPU doesn't access the memory, then
* return to let CPU evict the buffer and proceed CPU pagetable update.
@@ -1754,46 +1801,49 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
}
/**
- * svm_range_handle_overlap - split overlap ranges
- * @svms: svm range list header
- * @new: range added with this attributes
- * @start: range added start address, in pages
- * @last: range last address, in pages
- * @update_list: output, the ranges attributes are updated. For set_attr, this
- * will do validation and map to GPUs. For unmap, this will be
- * removed and unmap from GPUs
- * @insert_list: output, the ranges will be inserted into svms, attributes are
- * not changes. For set_attr, this will add into svms.
- * @remove_list:output, the ranges will be removed from svms
- * @left: the remaining range after overlap, For set_attr, this will be added
- * as new range.
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
*
- * Total have 5 overlap cases.
+ * Check if the virtual address range has overlap with any existing ranges,
+ * split partly overlapping ranges and add new ranges in the gaps. All changes
+ * should be applied to the range_list and interval tree transactionally. If
+ * any range split or allocation fails, the entire update fails. Therefore any
+ * existing overlapping svm_ranges are cloned and the original svm_ranges left
+ * unchanged.
*
- * This function handles overlap of an address interval with existing
- * struct svm_ranges for applying new attributes. This may require
- * splitting existing struct svm_ranges. All changes should be applied to
- * the range_list and interval tree transactionally. If any split operation
- * fails, the entire update fails. Therefore the existing overlapping
- * svm_ranges are cloned and the original svm_ranges left unchanged. If the
- * transaction succeeds, the modified clones are added and the originals
- * freed. Otherwise the clones are removed and the old svm_ranges remain.
+ * If the transaction succeeds, the caller can update and insert clones and
+ * new ranges, then free the originals.
*
- * Context: The caller must hold svms->lock
+ * Otherwise the caller can free the clones and new ranges, while the old
+ * svm_ranges remain unchanged.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
*/
static int
-svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
- unsigned long start, unsigned long last,
- struct list_head *update_list,
- struct list_head *insert_list,
- struct list_head *remove_list,
- unsigned long *left)
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+ uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+ struct list_head *update_list, struct list_head *insert_list,
+ struct list_head *remove_list)
{
+ unsigned long last = start + size - 1UL;
+ struct svm_range_list *svms = &p->svms;
struct interval_tree_node *node;
struct svm_range *prange;
struct svm_range *tmp;
int r = 0;
+ pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
+
INIT_LIST_HEAD(update_list);
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
@@ -1801,18 +1851,24 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
struct interval_tree_node *next;
- struct svm_range *old;
unsigned long next_start;
pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
node->last);
- old = container_of(node, struct svm_range, it_node);
+ prange = container_of(node, struct svm_range, it_node);
next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1;
- if (node->start < start || node->last > last) {
- /* node intersects the updated range, clone+split it */
+ if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
+ /* nothing to do */
+ } else if (node->start < start || node->last > last) {
+ /* node intersects the update range and its attributes
+ * will change. Clone and split it, apply updates only
+ * to the overlapping part
+ */
+ struct svm_range *old = prange;
+
prange = svm_range_clone(old);
if (!prange) {
r = -ENOMEM;
@@ -1821,17 +1877,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
list_add(&old->remove_list, remove_list);
list_add(&prange->insert_list, insert_list);
+ list_add(&prange->update_list, update_list);
if (node->start < start) {
pr_debug("change old range start\n");
- r = svm_range_split_head(prange, new, start,
+ r = svm_range_split_head(prange, start,
insert_list);
if (r)
goto out;
}
if (node->last > last) {
pr_debug("change old range last\n");
- r = svm_range_split_tail(prange, new, last,
+ r = svm_range_split_tail(prange, last,
insert_list);
if (r)
goto out;
@@ -1840,16 +1897,12 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
/* The node is contained within start..last,
* just update it
*/
- prange = old;
- }
-
- if (!svm_range_is_same_attrs(prange, new))
list_add(&prange->update_list, update_list);
+ }
/* insert a new node if needed */
if (node->start > start) {
- prange = svm_range_new(prange->svms, start,
- node->start - 1);
+ prange = svm_range_new(svms, start, node->start - 1);
if (!prange) {
r = -ENOMEM;
goto out;
@@ -1863,8 +1916,16 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
start = next_start;
}
- if (left && start <= last)
- *left = last - start + 1;
+ /* add a final range at the end if needed */
+ if (start <= last) {
+ prange = svm_range_new(svms, start, last);
+ if (!prange) {
+ r = -ENOMEM;
+ goto out;
+ }
+ list_add(&prange->insert_list, insert_list);
+ list_add(&prange->update_list, update_list);
+ }
out:
if (r)
@@ -1970,7 +2031,7 @@ restart:
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
- amdgpu_ih_wait_on_checkpoint_process(pdd->dev->adev,
+ amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
&pdd->dev->adev->irq.ih1);
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
@@ -2161,6 +2222,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
/**
* svm_range_cpu_invalidate_pagetables - interval notifier callback
+ * @mni: mmu_interval_notifier struct
+ * @range: mmu_notifier_range struct
+ * @cur_seq: value to pass to mmu_interval_set_seq()
*
* If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
* is from migration, or CPU page invalidation callback.
@@ -2884,59 +2948,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
}
/**
- * svm_range_add - add svm range and handle overlap
- * @p: the range add to this process svms
- * @start: page size aligned
- * @size: page size aligned
- * @nattr: number of attributes
- * @attrs: array of attributes
- * @update_list: output, the ranges need validate and update GPU mapping
- * @insert_list: output, the ranges need insert to svms
- * @remove_list: output, the ranges are replaced and need remove from svms
- *
- * Check if the virtual address range has overlap with the registered ranges,
- * split the overlapped range, copy and adjust pages address and vram nodes in
- * old and new ranges.
- *
- * Context: Process context, caller must hold svms->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-static int
-svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
- uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
- struct list_head *update_list, struct list_head *insert_list,
- struct list_head *remove_list)
-{
- uint64_t last = start + size - 1UL;
- struct svm_range_list *svms;
- struct svm_range new = {0};
- struct svm_range *prange;
- unsigned long left = 0;
- int r = 0;
-
- pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
-
- svm_range_apply_attrs(p, &new, nattr, attrs);
-
- svms = &p->svms;
-
- r = svm_range_handle_overlap(svms, &new, start, last, update_list,
- insert_list, remove_list, &left);
- if (r)
- return r;
-
- if (left) {
- prange = svm_range_new(svms, last - left + 1, last);
- list_add(&prange->insert_list, insert_list);
- list_add(&prange->update_list, update_list);
- }
-
- return 0;
-}
-
-/**
* svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure
*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 2d44b26b6657..948fbb39336e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
if (dev->gpu) {
log_max_watch_addr =
- __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
+ __ilog2_u32(dev->gpu->device_info.num_of_watch_points);
if (log_max_watch_addr) {
dev->node_props.capability |=
@@ -1285,6 +1285,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
void *crat_image = NULL;
size_t image_size = 0;
int proximity_domain;
+ int i;
+ const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
INIT_LIST_HEAD(&temp_topology_device_list);
@@ -1370,13 +1372,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info);
- strncpy(dev->node_props.name, gpu->device_info->asic_name,
- KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
+ for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+ dev->node_props.name[i] = __tolower(asic_name[i]);
+ if (asic_name[i] == '\0')
+ break;
+ }
+ dev->node_props.name[i] = '\0';
dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine;
- dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version;
+ dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;
dev->node_props.vendor_id = gpu->pdev->vendor;
dev->node_props.device_id = gpu->pdev->device;
dev->node_props.capability |=
@@ -1396,7 +1402,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.num_sdma_xgmi_engines =
kfd_get_num_xgmi_sdma_engines(gpu);
dev->node_props.num_sdma_queues_per_engine =
- gpu->device_info->num_sdma_queues_per_engine;
+ gpu->device_info.num_sdma_queues_per_engine;
dev->node_props.num_gws = (dev->gpu->gws &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
dev->gpu->adev->gds.gws_size : 0;
@@ -1572,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
gpu->use_iommu_v2 = false;
- if (!gpu->device_info->needs_iommu_device)
+ if (!gpu->device_info.needs_iommu_device)
return;
down_read(&topology_lock);