diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 291 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 18 |
3 files changed, 311 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cfdd34c2223c..6cb0c4168fa0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -735,6 +735,9 @@ struct svm_range_list { struct mutex lock; struct rb_root_cached objects; struct list_head list; + struct work_struct deferred_list_work; + struct list_head deferred_range_list; + spinlock_t deferred_list_lock; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2f92b92739bc..72d54cf4acc8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -137,6 +137,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->update_list); INIT_LIST_HEAD(&prange->remove_list); INIT_LIST_HEAD(&prange->insert_list); + INIT_LIST_HEAD(&prange->deferred_list); + INIT_LIST_HEAD(&prange->child_list); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, @@ -411,6 +413,18 @@ svm_range_split_head(struct svm_range *prange, struct svm_range *new, return r; } +static void +svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, + struct svm_range *pchild, enum svm_work_list_ops op) +{ + pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", + pchild, pchild->start, pchild->last, prange, op); + + pchild->work_item.mm = mm; + pchild->work_item.op = op; + list_add_tail(&pchild->child_list, &prange->child_list); +} + /* * Validation+GPU mapping with concurrent invalidation (MMU notifiers) * @@ -470,6 +484,30 @@ unreserve_out: return r; } +/** + * svm_range_list_lock_and_flush_work - flush pending deferred work + * + * @svms: the svm range list + * @mm: the mm structure + * + * Context: Returns with mmap write lock held, pending deferred work flushed + * + */ +static void +svm_range_list_lock_and_flush_work(struct svm_range_list *svms, + struct mm_struct *mm) +{ +retry_flush_work: + flush_work(&svms->deferred_list_work); + mmap_write_lock(mm); + + if (list_empty(&svms->deferred_range_list)) + return; + mmap_write_unlock(mm); + pr_debug("retry flush\n"); + goto retry_flush_work; +} + static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -610,15 +648,260 @@ out: return r; } +static void +svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, + struct svm_range *prange) +{ + unsigned long start; + unsigned long last; + + start = prange->notifier.interval_tree.start >> PAGE_SHIFT; + last = prange->notifier.interval_tree.last >> PAGE_SHIFT; + + if (prange->start == start && prange->last == last) + return; + + pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", + prange->svms, prange, start, last, prange->start, + prange->last); + + if (start != 0 && last != 0) { + interval_tree_remove(&prange->it_node, &prange->svms->objects); + svm_range_remove_notifier(prange); + } + prange->it_node.start = prange->start; + prange->it_node.last = prange->last; + + interval_tree_insert(&prange->it_node, &prange->svms->objects); + svm_range_add_notifier_locked(mm, prange); +} + +static void +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) +{ + struct mm_struct *mm = prange->work_item.mm; + + switch (prange->work_item.op) { + case SVM_OP_NULL: + pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + break; + case SVM_OP_UNMAP_RANGE: + pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_unlink(prange); + svm_range_remove_notifier(prange); + svm_range_free(prange); + break; + case SVM_OP_UPDATE_RANGE_NOTIFIER: + pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_update_notifier_and_interval_tree(mm, prange); + break; + case SVM_OP_ADD_RANGE: + pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, + prange->start, prange->last); + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + break; + default: + WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange, + prange->work_item.op); + } +} + +static void svm_range_deferred_list_work(struct work_struct *work) +{ + struct svm_range_list *svms; + struct svm_range *prange; + struct mm_struct *mm; + + svms = container_of(work, struct svm_range_list, deferred_list_work); + pr_debug("enter svms 0x%p\n", svms); + + spin_lock(&svms->deferred_list_lock); + while (!list_empty(&svms->deferred_range_list)) { + prange = list_first_entry(&svms->deferred_range_list, + struct svm_range, deferred_list); + spin_unlock(&svms->deferred_list_lock); + pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, + prange->start, prange->last, prange->work_item.op); + + mm = prange->work_item.mm; + mmap_write_lock(mm); + mutex_lock(&svms->lock); + + /* Remove from deferred_list must be inside mmap write lock, + * otherwise, svm_range_list_lock_and_flush_work may hold mmap + * write lock, and continue because deferred_list is empty, then + * deferred_list handle is blocked by mmap write lock. + */ + spin_lock(&svms->deferred_list_lock); + list_del_init(&prange->deferred_list); + spin_unlock(&svms->deferred_list_lock); + + while (!list_empty(&prange->child_list)) { + struct svm_range *pchild; + + pchild = list_first_entry(&prange->child_list, + struct svm_range, child_list); + pr_debug("child prange 0x%p op %d\n", pchild, + pchild->work_item.op); + list_del_init(&pchild->child_list); + svm_range_handle_list_op(svms, pchild); + } + + svm_range_handle_list_op(svms, prange); + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + + spin_lock(&svms->deferred_list_lock); + } + spin_unlock(&svms->deferred_list_lock); + + pr_debug("exit svms 0x%p\n", svms); +} + +static void +svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, + struct mm_struct *mm, enum svm_work_list_ops op) +{ + spin_lock(&svms->deferred_list_lock); + /* if prange is on the deferred list */ + if (!list_empty(&prange->deferred_list)) { + pr_debug("update exist prange 0x%p work op %d\n", prange, op); + WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n"); + if (op != SVM_OP_NULL && + prange->work_item.op != SVM_OP_UNMAP_RANGE) + prange->work_item.op = op; + } else { + prange->work_item.op = op; + prange->work_item.mm = mm; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } + spin_unlock(&svms->deferred_list_lock); +} + +static void schedule_deferred_list_work(struct svm_range_list *svms) +{ + spin_lock(&svms->deferred_list_lock); + if (!list_empty(&svms->deferred_range_list)) + schedule_work(&svms->deferred_list_work); + spin_unlock(&svms->deferred_list_lock); +} + +static void +svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, + struct svm_range *prange, unsigned long start, + unsigned long last) +{ + struct svm_range *head; + struct svm_range *tail; + + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { + pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange, + prange->start, prange->last); + return; + } + if (start > prange->last || last < prange->start) + return; + + head = tail = prange; + if (start > prange->start) + svm_range_split(prange, prange->start, start - 1, &tail); + if (last < tail->last) + svm_range_split(tail, last + 1, tail->last, &head); + + if (head != prange && tail != prange) { + svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + } else if (tail != prange) { + svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + } else if (head != prange) { + svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + } else if (parent != prange) { + prange->work_item.op = SVM_OP_UNMAP_RANGE; + } +} + +static void +svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, + unsigned long start, unsigned long last) +{ + struct svm_range_list *svms; + struct svm_range *pchild; + struct kfd_process *p; + bool unmap_parent; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return; + svms = &p->svms; + + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, + prange, prange->start, prange->last, start, last); + + unmap_parent = start <= prange->start && last >= prange->last; + + list_for_each_entry(pchild, &prange->child_list, child_list) + svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(mm, prange, prange, start, last); + + if (unmap_parent) + svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); + else + svm_range_add_list_work(svms, prange, mm, + SVM_OP_UPDATE_RANGE_NOTIFIER); + schedule_deferred_list_work(svms); + + kfd_unref_process(p); +} + /** * svm_range_cpu_invalidate_pagetables - interval notifier callback * + * MMU range unmap notifier to remove svm ranges */ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { + struct svm_range *prange; + unsigned long start; + unsigned long last; + + if (range->event == MMU_NOTIFY_RELEASE) + return true; + + start = mni->interval_tree.start; + last = mni->interval_tree.last; + start = (start > range->start ? start : range->start) >> PAGE_SHIFT; + last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT; + pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n", + start, last, range->start >> PAGE_SHIFT, + (range->end - 1) >> PAGE_SHIFT, + mni->interval_tree.start >> PAGE_SHIFT, + mni->interval_tree.last >> PAGE_SHIFT, range->event); + + prange = container_of(mni, struct svm_range, notifier); + + svm_range_lock(prange); + mmu_interval_set_seq(mni, cur_seq); + + switch (range->event) { + case MMU_NOTIFY_UNMAP: + svm_range_unmap_from_cpu(mni->mm, prange, start, last); + break; + default: + break; + } + + svm_range_unlock(prange); + return true; } @@ -627,6 +910,9 @@ void svm_range_list_fini(struct kfd_process *p) mutex_destroy(&p->svms.lock); pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); + + /* Ensure list work is finished before process is destroyed */ + flush_work(&p->svms.deferred_list_work); } int svm_range_list_init(struct kfd_process *p) @@ -636,6 +922,9 @@ int svm_range_list_init(struct kfd_process *p) svms->objects = RB_ROOT_CACHED; mutex_init(&svms->lock); INIT_LIST_HEAD(&svms->list); + INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); + INIT_LIST_HEAD(&svms->deferred_range_list); + spin_lock_init(&svms->deferred_list_lock); return 0; } @@ -753,7 +1042,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, mutex_lock(&process_info->lock); - mmap_write_lock(mm); + svm_range_list_lock_and_flush_work(svms, mm); if (!svm_range_is_valid(mm, start, size)) { pr_debug("invalid range\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 10c73348c2c0..173e93e138a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -33,6 +33,18 @@ #include "amdgpu.h" #include "kfd_priv.h" +enum svm_work_list_ops { + SVM_OP_NULL, + SVM_OP_UNMAP_RANGE, + SVM_OP_UPDATE_RANGE_NOTIFIER, + SVM_OP_ADD_RANGE +}; + +struct svm_work_list_item { + enum svm_work_list_ops op; + struct mm_struct *mm; +}; + /** * struct svm_range - shared virtual memory range * @@ -54,6 +66,9 @@ * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages * @notifier: register mmu interval notifier + * @work_item: deferred work item information + * @deferred_list: list header used to add range to deferred list + * @child_list: list header for split ranges which are not added to svms yet * @bitmap_access: index bitmap of GPUs which can access the range * @bitmap_aip: index bitmap of GPUs which can access the range in place * @@ -79,6 +94,9 @@ struct svm_range { uint32_t actual_loc; uint8_t granularity; struct mmu_interval_notifier notifier; + struct svm_work_list_item work_item; + struct list_head deferred_list; + struct list_head child_list; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); }; |