diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 135 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 |
4 files changed, 140 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6cb0c4168fa0..81f71c4079a6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -738,6 +738,8 @@ struct svm_range_list { struct work_struct deferred_list_work; struct list_head deferred_range_list; spinlock_t deferred_list_lock; + atomic_t evicted_ranges; + struct delayed_work restore_work; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5f1ec7553509..3c72e9dc6422 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); + cancel_delayed_work_sync(&p->svms.restore_work); mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3ccb75d45f13..1fe6913242d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -22,6 +22,7 @@ */ #include <linux/types.h> +#include <linux/sched/task.h> #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -29,6 +30,8 @@ #include "kfd_priv.h" #include "kfd_svm.h" +#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 + static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, @@ -251,6 +254,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->insert_list); INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); + atomic_set(&prange->invalid, 0); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, @@ -963,6 +967,129 @@ retry_flush_work: goto retry_flush_work; } +static void svm_range_restore_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info; + struct svm_range_list *svms; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int evicted_ranges; + int invalid; + int r; + + svms = container_of(dwork, struct svm_range_list, restore_work); + evicted_ranges = atomic_read(&svms->evicted_ranges); + if (!evicted_ranges) + return; + + pr_debug("restore svm ranges\n"); + + /* kfd_process_notifier_release destroys this worker thread. So during + * the lifetime of this thread, kfd_process and mm will be valid. + */ + p = container_of(svms, struct kfd_process, svms); + process_info = p->kgd_process_info; + mm = p->mm; + if (!mm) + return; + + mutex_lock(&process_info->lock); + svm_range_list_lock_and_flush_work(svms, mm); + mutex_lock(&svms->lock); + + evicted_ranges = atomic_read(&svms->evicted_ranges); + + list_for_each_entry(prange, &svms->list, list) { + invalid = atomic_read(&prange->invalid); + if (!invalid) + continue; + + pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n", + prange->svms, prange, prange->start, prange->last, + invalid); + + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + false, true); + if (r) { + pr_debug("failed %d to map 0x%lx to gpus\n", r, + prange->start); + goto unlock_out; + } + + if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid) + goto unlock_out; + } + + if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) != + evicted_ranges) + goto unlock_out; + + evicted_ranges = 0; + + r = kgd2kfd_resume_mm(mm); + if (r) { + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + pr_debug("failed %d to resume KFD\n", r); + } + + pr_debug("restore svm ranges successfully\n"); + +unlock_out: + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + mutex_unlock(&process_info->lock); + + /* If validation failed, reschedule another attempt */ + if (evicted_ranges) { + pr_debug("reschedule to restore svm range\n"); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + } +} + +/** + * svm_range_evict - evict svm range + * + * Stop all queues of the process to ensure GPU doesn't access the memory, then + * return to let CPU evict the buffer and proceed CPU pagetable update. + * + * Don't need use lock to sync cpu pagetable invalidation with GPU execution. + * If invalidation happens while restore work is running, restore work will + * restart to ensure to get the latest CPU pages mapping to GPU, then start + * the queues. + */ +static int +svm_range_evict(struct svm_range *prange, struct mm_struct *mm, + unsigned long start, unsigned long last) +{ + struct svm_range_list *svms = prange->svms; + int evicted_ranges; + int r = 0; + + atomic_inc(&prange->invalid); + evicted_ranges = atomic_inc_return(&svms->evicted_ranges); + if (evicted_ranges != 1) + return r; + + pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + + /* First eviction, stop the queues */ + r = kgd2kfd_quiesce_mm(mm); + if (r) + pr_debug("failed to quiesce KFD\n"); + + pr_debug("schedule to restore svm %p ranges\n", svms); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + + return r; +} + static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -1331,6 +1458,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, * svm_range_cpu_invalidate_pagetables - interval notifier callback * * MMU range unmap notifier to remove svm ranges + * + * If GPU vm fault retry is not enabled, evict the svm range, then restore + * work will update GPU mapping. + * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault + * will update GPU mapping. */ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -1364,6 +1496,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, svm_range_unmap_from_cpu(mni->mm, prange, start, last); break; default: + svm_range_evict(prange, mni->mm, start, last); break; } @@ -1389,6 +1522,8 @@ int svm_range_list_init(struct kfd_process *p) svms->objects = RB_ROOT_CACHED; mutex_init(&svms->lock); INIT_LIST_HEAD(&svms->list); + atomic_set(&svms->evicted_ranges, 0); + INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); INIT_LIST_HEAD(&svms->deferred_range_list); spin_lock_init(&svms->deferred_list_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 5949890bf48c..3c94899c5c40 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -67,6 +67,7 @@ struct svm_work_list_item { * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages + * @invalid: not 0 means cpu page table is invalidated * @notifier: register mmu interval notifier * @work_item: deferred work item information * @deferred_list: list header used to add range to deferred list @@ -97,6 +98,7 @@ struct svm_range { uint32_t prefetch_loc; uint32_t actual_loc; uint8_t granularity; + atomic_t invalid; struct mmu_interval_notifier notifier; struct svm_work_list_item work_item; struct list_head deferred_list; |