diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 217 |
1 files changed, 156 insertions, 61 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7b332246eda3..11074cc8c333 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -32,6 +32,7 @@ #include "kfd_priv.h" #include "kfd_svm.h" #include "kfd_migrate.h" +#include "kfd_smi_events.h" #ifdef dev_fmt #undef dev_fmt @@ -43,7 +44,13 @@ /* Long enough to ensure no retry fault comes after svm range is restored and * page table is updated. */ -#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 +#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC) + +/* Giant svm range split into smaller ranges based on this, it is decided using + * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to + * power of 2MB. + */ +static uint64_t max_svm_range_pages; struct criu_svm_metadata { struct list_head list; @@ -259,13 +266,22 @@ void svm_range_free_dma_mappings(struct svm_range *prange) } } -static void svm_range_free(struct svm_range *prange) +static void svm_range_free(struct svm_range *prange, bool update_mem_usage) { + uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT; + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); + + if (update_mem_usage && !p->xnack_enabled) { + pr_debug("unreserve mem limit: %lld\n", size); + amdgpu_amdkfd_unreserve_mem_limit(NULL, size, + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + } mutex_destroy(&prange->lock); mutex_destroy(&prange->migrate_mutex); kfree(prange); @@ -284,7 +300,7 @@ svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, static struct svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, - uint64_t last) + uint64_t last, bool update_mem_usage) { uint64_t size = last - start + 1; struct svm_range *prange; @@ -293,6 +309,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, prange = kzalloc(sizeof(*prange), GFP_KERNEL); if (!prange) return NULL; + + p = container_of(svms, struct kfd_process, svms); + if (!p->xnack_enabled && update_mem_usage && + amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { + pr_info("SVM mapping failed, exceeds resident system memory limit\n"); + kfree(prange); + return NULL; + } prange->npages = size; prange->svms = svms; prange->start = start; @@ -307,7 +332,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); - p = container_of(svms, struct kfd_process, svms); if (p->xnack_enabled) bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); @@ -517,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, kfree(svm_bo); return -ESRCH; } - svm_bo->svms = prange->svms; svm_bo->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), mm, @@ -1000,9 +1023,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, svms = prange->svms; if (old_start == start) - *new = svm_range_new(svms, last + 1, old_last); + *new = svm_range_new(svms, last + 1, old_last, false); else - *new = svm_range_new(svms, old_start, start - 1); + *new = svm_range_new(svms, old_start, start - 1, false); if (!*new) return -ENOMEM; @@ -1010,7 +1033,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, if (r) { pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", r, old_start, old_last, start, last); - svm_range_free(*new); + svm_range_free(*new, false); *new = NULL; } @@ -1199,7 +1222,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, static int svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, - unsigned long last) + unsigned long last, uint32_t trigger) { DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct kfd_process_device *pdd; @@ -1231,6 +1254,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, return -EINVAL; } + kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, + start, last, trigger); + r = svm_range_unmap_from_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv), start, last, &fence); @@ -1617,7 +1643,7 @@ unreserve_out: svm_range_unreserve_bos(&ctx); if (!r) - prange->validate_timestamp = ktime_to_us(ktime_get()); + prange->validate_timestamp = ktime_get_boottime(); return r; } @@ -1729,14 +1755,16 @@ out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); mutex_unlock(&process_info->lock); - mmput(mm); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { pr_debug("reschedule to restore svm range\n"); schedule_delayed_work(&svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); } + mmput(mm); } /** @@ -1756,7 +1784,8 @@ out_reschedule: */ static int svm_range_evict(struct svm_range *prange, struct mm_struct *mm, - unsigned long start, unsigned long last) + unsigned long start, unsigned long last, + enum mmu_notifier_event event) { struct svm_range_list *svms = prange->svms; struct svm_range *pchild; @@ -1768,10 +1797,15 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange->start, prange->last, start, last); - if (!p->xnack_enabled) { + if (!p->xnack_enabled || + (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) { int evicted_ranges; + bool mapped = prange->mapped_to_gpu; list_for_each_entry(pchild, &prange->child_list, child_list) { + if (!pchild->mapped_to_gpu) + continue; + mapped = true; mutex_lock_nested(&pchild->lock, 1); if (pchild->start <= last && pchild->last >= start) { pr_debug("increment pchild invalid [0x%lx 0x%lx]\n", @@ -1781,6 +1815,9 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, mutex_unlock(&pchild->lock); } + if (!mapped) + return r; + if (prange->start <= last && prange->last >= start) atomic_inc(&prange->invalid); @@ -1792,7 +1829,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, prange->svms, prange->start, prange->last); /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); if (r) pr_debug("failed to quiesce KFD\n"); @@ -1801,6 +1838,12 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { unsigned long s, l; + uint32_t trigger; + + if (event == MMU_NOTIFY_MIGRATE) + trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE; + else + trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY; pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); @@ -1809,13 +1852,13 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, s = max(start, pchild->start); l = min(last, pchild->last); if (l >= s) - svm_range_unmap_from_gpus(pchild, s, l); + svm_range_unmap_from_gpus(pchild, s, l, trigger); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) - svm_range_unmap_from_gpus(prange, s, l); + svm_range_unmap_from_gpus(prange, s, l, trigger); } return r; @@ -1825,7 +1868,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; - new = svm_range_new(old->svms, old->start, old->last); + new = svm_range_new(old->svms, old->start, old->last, false); if (!new) return NULL; @@ -1849,6 +1892,46 @@ static struct svm_range *svm_range_clone(struct svm_range *old) return new; } +void svm_range_set_max_pages(struct amdgpu_device *adev) +{ + uint64_t max_pages; + uint64_t pages, _pages; + + /* 1/32 VRAM size in pages */ + pages = adev->gmc.real_vram_size >> 17; + pages = clamp(pages, 1ULL << 9, 1ULL << 18); + pages = rounddown_pow_of_two(pages); + do { + max_pages = READ_ONCE(max_svm_range_pages); + _pages = min_not_zero(max_pages, pages); + } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages); +} + +static int +svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last, + uint64_t max_pages, struct list_head *insert_list, + struct list_head *update_list) +{ + struct svm_range *prange; + uint64_t l; + + pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n", + max_pages, start, last); + + while (last >= start) { + l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1); + + prange = svm_range_new(svms, start, l, true); + if (!prange) + return -ENOMEM; + list_add(&prange->list, insert_list); + list_add(&prange->update_list, update_list); + + start = l + 1; + } + return 0; +} + /** * svm_range_add - add svm range and handle overlap * @p: the range add to this process svms @@ -1889,6 +1972,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, struct interval_tree_node *node; struct svm_range *prange; struct svm_range *tmp; + struct list_head new_list; int r = 0; pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last); @@ -1896,6 +1980,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, INIT_LIST_HEAD(update_list); INIT_LIST_HEAD(insert_list); INIT_LIST_HEAD(remove_list); + INIT_LIST_HEAD(&new_list); node = interval_tree_iter_first(&svms->objects, start, last); while (node) { @@ -1951,14 +2036,11 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, /* insert a new node if needed */ if (node->start > start) { - prange = svm_range_new(svms, start, node->start - 1); - if (!prange) { - r = -ENOMEM; + r = svm_range_split_new(svms, start, node->start - 1, + READ_ONCE(max_svm_range_pages), + &new_list, update_list); + if (r) goto out; - } - - list_add(&prange->list, insert_list); - list_add(&prange->update_list, update_list); } node = next; @@ -1966,20 +2048,20 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, } /* add a final range at the end if needed */ - if (start <= last) { - prange = svm_range_new(svms, start, last); - if (!prange) { - r = -ENOMEM; - goto out; - } - list_add(&prange->list, insert_list); - list_add(&prange->update_list, update_list); - } + if (start <= last) + r = svm_range_split_new(svms, start, last, + READ_ONCE(max_svm_range_pages), + &new_list, update_list); out: - if (r) + if (r) { list_for_each_entry_safe(prange, tmp, insert_list, list) - svm_range_free(prange); + svm_range_free(prange, false); + list_for_each_entry_safe(prange, tmp, &new_list, list) + svm_range_free(prange, true); + } else { + list_splice(&new_list, insert_list); + } return r; } @@ -2026,7 +2108,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange, svms, prange, prange->start, prange->last); svm_range_unlink(prange); svm_range_remove_notifier(prange); - svm_range_free(prange); + svm_range_free(prange, true); break; case SVM_OP_UPDATE_RANGE_NOTIFIER: pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", @@ -2229,6 +2311,7 @@ static void svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, unsigned long start, unsigned long last) { + uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU; struct svm_range_list *svms; struct svm_range *pchild; struct kfd_process *p; @@ -2256,14 +2339,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, s = max(start, pchild->start); l = min(last, pchild->last); if (l >= s) - svm_range_unmap_from_gpus(pchild, s, l); + svm_range_unmap_from_gpus(pchild, s, l, trigger); svm_range_unmap_split(mm, prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) - svm_range_unmap_from_gpus(prange, s, l); + svm_range_unmap_from_gpus(prange, s, l, trigger); svm_range_unmap_split(mm, prange, prange, start, last); if (unmap_parent) @@ -2330,7 +2413,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, svm_range_unmap_from_cpu(mni->mm, prange, start, last); break; default: - svm_range_evict(prange, mni->mm, start, last); + svm_range_evict(prange, mni->mm, start, last, range->event); break; } @@ -2588,14 +2671,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, last = addr; } - prange = svm_range_new(&p->svms, start, last); + prange = svm_range_new(&p->svms, start, last, true); if (!prange) { pr_debug("Failed to create prange in address [0x%llx]\n", addr); return NULL; } if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { pr_debug("failed to get gpuid from kgd\n"); - svm_range_free(prange); + svm_range_free(prange, true); return NULL; } @@ -2694,11 +2777,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; - uint64_t timestamp; + ktime_t timestamp = ktime_get_boottime(); int32_t best_loc; int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; + bool migration = false; int r = 0; if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { @@ -2775,9 +2859,9 @@ retry_write_locked: goto out_unlock_range; } - timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; /* skip duplicate vm fault on different pages of same range */ - if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { + if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp, + AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) { pr_debug("svms 0x%p [0x%lx %lx] already restored\n", svms, prange->start, prange->last); r = 0; @@ -2813,9 +2897,14 @@ retry_write_locked: svms, prange->start, prange->last, best_loc, prange->actual_loc); + kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr, + write_fault, timestamp); + if (prange->actual_loc != best_loc) { + migration = true; if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); @@ -2823,12 +2912,14 @@ retry_write_locked: * VRAM failed */ if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", @@ -2842,6 +2933,9 @@ retry_write_locked: pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", r, svms, prange->start, prange->last); + kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr, + migration); + out_unlock_range: mutex_unlock(&prange->migrate_mutex); out_unlock_svms: @@ -2884,7 +2978,7 @@ void svm_range_list_fini(struct kfd_process *p) list_for_each_entry_safe(prange, next, &p->svms.list, list) { svm_range_unlink(prange); svm_range_remove_notifier(prange); - svm_range_free(prange); + svm_range_free(prange, true); } mutex_destroy(&p->svms.lock); @@ -3148,12 +3242,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3178,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) static void svm_range_evict_svm_bo_worker(struct work_struct *work) { struct svm_range_bo *svm_bo; - struct kfd_process *p; struct mm_struct *mm; int r = 0; @@ -3186,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) if (!svm_bo_ref_unless_zero(svm_bo)) return; /* svm_bo was freed while eviction was pending */ - /* svm_range_bo_release destroys this worker thread. So during - * the lifetime of this thread, kfd_process and mm will be valid. - */ - p = container_of(svm_bo->svms, struct kfd_process, svms); - mm = p->mm; - if (!mm) + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + mm = svm_bo->eviction_fence->mm; + } else { + svm_range_bo_unref(svm_bo); return; + } mmap_read_lock(mm); spin_lock(&svm_bo->list_lock); @@ -3210,8 +3302,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { - r = svm_migrate_vram_to_ram(prange, - svm_bo->eviction_fence->mm); + r = svm_migrate_vram_to_ram(prange, mm, + KFD_MIGRATE_TRIGGER_TTM_EVICTION); } while (!r && prange->actual_loc && --retries); if (!r && prange->actual_loc) @@ -3228,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) } spin_unlock(&svm_bo->list_lock); mmap_read_unlock(mm); + mmput(mm); dma_fence_signal(&svm_bo->eviction_fence->base); @@ -3299,7 +3392,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, prange->last); svm_range_unlink(prange); svm_range_remove_notifier(prange); - svm_range_free(prange); + svm_range_free(prange, false); } mmap_write_downgrade(mm); @@ -3317,7 +3410,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, if (r) goto out_unlock_range; - if (migrated && !p->xnack_enabled) { + if (migrated && (!p->xnack_enabled || + (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) && + prange->mapped_to_gpu) { pr_debug("restore_work will update mappings of GPUs\n"); mutex_unlock(&prange->migrate_mutex); continue; |