diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 159 |
1 files changed, 119 insertions, 40 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index be4629cdac04..c02659025656 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -620,10 +620,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int r; if (!hub->sdma_invalidation_workaround || vmid || - !adev->mman.buffer_funcs_enabled || - !adev->ib_pool_ready || amdgpu_in_reset(adev) || + !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || !ring->sched.ready) { - /* * A GPU reset should flush all TLBs anyway, so no need to do * this while one is ongoing. @@ -684,12 +682,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - signed long r; + int r; uint32_t seq; - if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || - !down_read_trylock(&adev->reset_domain->sem)) { + /* + * A GPU reset should flush all TLBs anyway, so no need to do + * this while one is ongoing. + */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return 0; + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) { if (adev->gmc.flush_tlb_needs_extra_type_2) adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, 2, all_hub, @@ -703,43 +706,44 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); - return 0; - } + r = 0; + } else { + /* 2 dwords flush + 8 dwords fence */ + ndw = kiq->pmf->invalidate_tlbs_size + 8; - /* 2 dwords flush + 8 dwords fence */ - ndw = kiq->pmf->invalidate_tlbs_size + 8; + if (adev->gmc.flush_tlb_needs_extra_type_2) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_2) - ndw += kiq->pmf->invalidate_tlbs_size; + if (adev->gmc.flush_tlb_needs_extra_type_0) + ndw += kiq->pmf->invalidate_tlbs_size; - if (adev->gmc.flush_tlb_needs_extra_type_0) - ndw += kiq->pmf->invalidate_tlbs_size; + spin_lock(&adev->gfx.kiq[inst].ring_lock); + r = amdgpu_ring_alloc(ring, ndw); + if (r) { + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } + if (adev->gmc.flush_tlb_needs_extra_type_2) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); - spin_lock(&adev->gfx.kiq[inst].ring_lock); - amdgpu_ring_alloc(ring, ndw); - if (adev->gmc.flush_tlb_needs_extra_type_2) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); - if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - goto error_unlock_reset; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - r = -ETIME; - goto error_unlock_reset; + if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + dev_err(adev->dev, "timeout waiting for kiq fence\n"); + r = -ETIME; + } } - r = 0; error_unlock_reset: up_read(&adev->reset_domain->sem); @@ -844,6 +848,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -876,11 +881,11 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) struct amdgpu_gmc *gmc = &adev->gmc; uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) || - gc_ver == IP_VERSION(9, 3, 0) || gc_ver == IP_VERSION(9, 4, 0) || gc_ver == IP_VERSION(9, 4, 1) || gc_ver == IP_VERSION(9, 4, 2) || gc_ver == IP_VERSION(9, 4, 3) || + gc_ver == IP_VERSION(9, 4, 4) || gc_ver >= IP_VERSION(10, 3, 0)); if (!amdgpu_sriov_xnack_support(adev)) @@ -1015,7 +1020,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) flags |= AMDGPU_PTE_WRITEABLE; flags |= AMDGPU_PTE_SNOOPED; flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); - flags |= AMDGPU_PDE_PTE; + flags |= AMDGPU_PDE_PTE_FLAG(adev); /* The first n PDE0 entries are used as PTE, * pointing to vram @@ -1028,7 +1033,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) * pointing to a 4K system page */ flags = AMDGPU_PTE_VALID; - flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED; + flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0); /* Requires gart_ptb_gpu_pa to be 4K aligned */ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags); drm_dev_exit(idx); @@ -1147,8 +1152,6 @@ static ssize_t current_memory_partition_show( default: return sysfs_emit(buf, "UNKNOWN\n"); } - - return sysfs_emit(buf, "UNKNOWN\n"); } static DEVICE_ATTR_RO(current_memory_partition); @@ -1166,3 +1169,79 @@ void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) { device_remove_file(adev->dev, &dev_attr_current_memory_partition); } + +int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges, + int exp_ranges) +{ + struct amdgpu_gmc_memrange *ranges; + int range_cnt, ret, i, j; + uint32_t nps_type; + + if (!mem_ranges) + return -EINVAL; + + ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges, + &range_cnt); + + if (ret) + return ret; + + /* TODO: For now, expect ranges and partition count to be the same. + * Adjust if there are holes expected in any NPS domain. + */ + if (range_cnt != exp_ranges) { + dev_warn( + adev->dev, + "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d", + exp_ranges, nps_type, range_cnt); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < exp_ranges; ++i) { + if (ranges[i].base_address >= ranges[i].limit_address) { + dev_warn( + adev->dev, + "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx", + nps_type, i, ranges[i].base_address, + ranges[i].limit_address); + ret = -EINVAL; + goto err; + } + + /* Check for overlaps, not expecting any now */ + for (j = i - 1; j >= 0; j--) { + if (max(ranges[j].base_address, + ranges[i].base_address) <= + min(ranges[j].limit_address, + ranges[i].limit_address)) { + dev_warn( + adev->dev, + "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]", + ranges[j].base_address, + ranges[j].limit_address, + ranges[i].base_address, + ranges[i].limit_address); + ret = -EINVAL; + goto err; + } + } + + mem_ranges[i].range.fpfn = + (ranges[i].base_address - + adev->vm_manager.vram_base_offset) >> + AMDGPU_GPU_PAGE_SHIFT; + mem_ranges[i].range.lpfn = + (ranges[i].limit_address - + adev->vm_manager.vram_base_offset) >> + AMDGPU_GPU_PAGE_SHIFT; + mem_ranges[i].size = + ranges[i].limit_address - ranges[i].base_address + 1; + } + +err: + kfree(ranges); + + return ret; +} |