aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-02-14 10:31:51 +1000
committerDave Airlie <airlied@redhat.com>2022-02-14 10:31:51 +1000
commit123db17ddff007080d464e785689fb14f94cbc7a (patch)
tree11da22fd6a508e496be838e43e0b504266c4a4d3 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parente7a09cea6483b44ea0c82f07145fcbd8a918bf96 (diff)
parent7f161df1a513e2961f4e3c96a8355c8ce93ad175 (diff)
Merge tag 'amd-drm-next-5.18-2022-02-11-1' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.18-2022-02-11-1: amdgpu: - Clean up of power management code - Enable freesync video mode by default - Clean up of RAS code - Improve VRAM access for debug using SDMA - Coding style cleanups - SR-IOV fixes - More display FP reorg - TLB flush fixes for Arcuturus, Vega20 - Misc display fixes - Rework special register access methods for SR-IOV - DP2 fixes - DP tunneling fixes - DSC fixes - More IP discovery cleanups - Misc RAS fixes - Enable both SMU i2c buses where applicable - s2idle improvements - DPCS header cleanup - Add new CAP firmware support for SR-IOV amdkfd: - Misc cleanups - SVM fixes - CRIU support - Clean up MQD manager UAPI: - Add interface to amdgpu CTX ioctl to request a stable power state for profiling https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 - Add amdkfd support for CRIU https://github.com/checkpoint-restore/criu/pull/1709 - Remove old unused amdkfd debugger interface Was only implemented for Kaveri and was only ever used by an old HSA tool that was never open sourced radeon: - Fix error handling in radeon_driver_open_kms - UVD suspend fix - Misc fixes From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220211220706.5803-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c74
1 files changed, 63 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f9bab963a948..2e00c3fb4bd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -778,7 +778,7 @@ unwind:
continue;
if (attachment[i]->bo_va) {
amdgpu_bo_reserve(bo[i], true);
- amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
}
@@ -795,7 +795,7 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
- amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va);
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
drm_gem_object_put(&bo->tbo.base);
list_del(&attachment->list);
kfree(attachment);
@@ -842,7 +842,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+ bool criu_resume)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -864,6 +865,18 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
goto out;
}
+ if (criu_resume) {
+ /*
+ * During a CRIU restore operation, the userptr buffer objects
+ * will be validated in the restore_userptr_work worker at a
+ * later stage when it is scheduled by another ioctl called by
+ * CRIU master process for the target pid for restore.
+ */
+ atomic_inc(&mem->invalid);
+ mutex_unlock(&process_info->lock);
+ return 0;
+ }
+
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
@@ -1452,10 +1465,39 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
return avm->pd_phys_addr;
}
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ WRITE_ONCE(pinfo->block_mmu_notifications, true);
+ mutex_unlock(&pinfo->lock);
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+ int ret = 0;
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ pr_debug("scheduling work\n");
+ atomic_inc(&pinfo->evicted_bos);
+ if (!READ_ONCE(pinfo->block_mmu_notifications)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ WRITE_ONCE(pinfo->block_mmu_notifications, false);
+ schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+
+out_unlock:
+ mutex_unlock(&pinfo->lock);
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags)
+ uint64_t *offset, uint32_t flags, bool criu_resume)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
enum ttm_bo_type bo_type = ttm_bo_type_device;
@@ -1558,7 +1600,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, user_addr);
+ pr_debug("creating userptr BO for user_addr = %llu\n", user_addr);
+ ret = init_user_pages(*mem, user_addr, criu_resume);
if (ret)
goto allocate_init_user_pages_failed;
} else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
@@ -1813,12 +1856,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
true);
ret = unreserve_bo_and_vms(&ctx, false, false);
- /* Only apply no TLB flush on Aldebaran to
- * workaround regressions on other Asics.
- */
- if (table_freed && (adev->asic_type != CHIP_ALDEBARAN))
- *table_freed = true;
-
goto out;
out_unreserve:
@@ -2068,6 +2105,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
int evicted_bos;
int r = 0;
+ /* Do not process MMU notifications until stage-4 IOCTL is received */
+ if (READ_ONCE(process_info->block_mmu_notifications))
+ return 0;
+
atomic_inc(&mem->invalid);
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
@@ -2635,3 +2676,14 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
return 0;
}
+
+bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+ struct kfd_mem_attachment *entry;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->is_mapped && entry->adev == adev)
+ return true;
+ }
+ return false;
+}