aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c156
7 files changed, 163 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0fffaf859c59..3b119db16003 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -406,7 +406,7 @@ static const struct kfd_device_info aldebaran_device_info = {
static const struct kfd_device_info renoir_device_info = {
.asic_family = CHIP_RENOIR,
.asic_name = "renoir",
- .gfx_target_version = 90002,
+ .gfx_target_version = 90012,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 8,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 533b27b35fc9..93e33dd84dd4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1226,6 +1226,11 @@ static int stop_cpsch(struct device_queue_manager *dqm)
bool hanging;
dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
if (!dqm->is_hws_hang)
unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
hanging = dqm->is_hws_hang || dqm->is_resetting;
@@ -1430,7 +1435,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->sched_running)
return 0;
- if (dqm->is_hws_hang)
+ if (dqm->is_hws_hang || dqm->is_resetting)
return -EIO;
if (!dqm->active_runlist)
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 2e86692def19..d1388896f9c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -308,7 +308,7 @@
* 16MB are reserved for kernel use (CWSR trap handler and kernel IB
* for now).
*/
-#define SVM_USER_BASE 0x1000000ull
+#define SVM_USER_BASE (u64)(KFD_CWSR_TBA_TMA_SIZE + 2*PAGE_SIZE)
#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6d8634e40b3b..9b9c2b9bf2ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -281,6 +281,19 @@ static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
return cpages;
}
+static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
+{
+ unsigned long upages = 0;
+ unsigned long i;
+
+ for (i = 0; i < migrate->npages; i++) {
+ if (migrate->src[i] & MIGRATE_PFN_VALID &&
+ !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
+ upages++;
+ }
+ return upages;
+}
+
static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
@@ -317,7 +330,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
- migrate->dst[i] |= MIGRATE_PFN_LOCKED;
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
@@ -610,7 +622,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
- migrate->dst[i] |= MIGRATE_PFN_LOCKED;
j++;
}
@@ -634,10 +645,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
+ unsigned long upages = npages;
+ unsigned long cpages = 0;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
- unsigned long cpages = 0;
dma_addr_t *scratch;
size_t size;
void *buf;
@@ -671,6 +683,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
if (!cpages) {
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
prange->start, prange->last);
+ upages = svm_migrate_unsuccessful_pages(&migrate);
goto out_free;
}
if (cpages != npages)
@@ -683,8 +696,9 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
scratch, npages);
migrate_vma_pages(&migrate);
- pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
+ upages = svm_migrate_unsuccessful_pages(&migrate);
+ pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+ upages, cpages, migrate.npages);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
@@ -698,9 +712,9 @@ out:
if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
- return cpages;
+ return upages;
}
- return r;
+ return r ? r : upages;
}
/**
@@ -720,7 +734,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
unsigned long addr;
unsigned long start;
unsigned long end;
- unsigned long cpages = 0;
+ unsigned long upages = 0;
long r = 0;
if (!prange->actual_loc) {
@@ -756,12 +770,12 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
pr_debug("failed %ld to migrate\n", r);
break;
} else {
- cpages += r;
+ upages += r;
}
addr = next;
}
- if (cpages) {
+ if (!upages) {
svm_range_vram_node_free(prange);
prange->actual_loc = 0;
}
@@ -784,7 +798,7 @@ static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm)
{
- int r;
+ int r, retries = 3;
/*
* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
@@ -793,9 +807,14 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
- r = svm_migrate_vram_to_ram(prange, mm);
- if (r)
- return r;
+ do {
+ r = svm_migrate_vram_to_ram(prange, mm);
+ if (r)
+ return r;
+ } while (prange->actual_loc && --retries);
+
+ if (prange->actual_loc)
+ return -EDEADLK;
return svm_migrate_ram_to_vram(prange, best_loc, mm);
}
@@ -840,6 +859,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
pr_debug("failed find process at fault address 0x%lx\n", addr);
return VM_FAULT_SIGBUS;
}
+ if (READ_ONCE(p->svms.faulting_task) == current) {
+ pr_debug("skipping ram migration\n");
+ kfd_unref_process(p);
+ return 0;
+ }
addr >>= PAGE_SHIFT;
pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 4104b167e721..8fd48d0ed240 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -766,8 +766,10 @@ struct svm_range_list {
struct list_head deferred_range_list;
spinlock_t deferred_list_lock;
atomic_t evicted_ranges;
+ atomic_t drain_pagefaults;
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
+ struct task_struct *faulting_task;
};
/* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 457863861d6f..b993011cfa64 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1715,7 +1715,11 @@ int kfd_process_evict_queues(struct kfd_process *p)
r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
&pdd->qpd);
- if (r) {
+ /* evict return -EIO if HWS is hang or asic is resetting, in this case
+ * we would like to set all the queues to be in evicted state to prevent
+ * them been add back since they actually not be saved right now.
+ */
+ if (r && r != -EIO) {
pr_err("Failed to evict process queues\n");
goto fail;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b691c8495d66..3cb4681c5f53 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1496,9 +1496,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
next = min(vma->vm_end, end);
npages = (next - addr) >> PAGE_SHIFT;
+ WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
addr, npages, &hmm_range,
readonly, true, owner);
+ WRITE_ONCE(p->svms.faulting_task, NULL);
if (r) {
pr_debug("failed %d to get svm range pages\n", r);
goto unreserve_out;
@@ -1572,7 +1574,6 @@ retry_flush_work:
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
- struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
@@ -1592,12 +1593,10 @@ static void svm_range_restore_work(struct work_struct *work)
* the lifetime of this thread, kfd_process and mm will be valid.
*/
p = container_of(svms, struct kfd_process, svms);
- process_info = p->kgd_process_info;
mm = p->mm;
if (!mm)
return;
- mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
@@ -1650,7 +1649,6 @@ static void svm_range_restore_work(struct work_struct *work)
out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
- mutex_unlock(&process_info->lock);
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
@@ -1966,10 +1964,16 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
struct kfd_process_device *pdd;
struct amdgpu_device *adev;
struct kfd_process *p;
+ int drain;
uint32_t i;
p = container_of(svms, struct kfd_process, svms);
+restart:
+ drain = atomic_read(&svms->drain_pagefaults);
+ if (!drain)
+ return;
+
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
if (!pdd)
@@ -1981,6 +1985,8 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
+ if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
+ goto restart;
}
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -1988,35 +1994,41 @@ static void svm_range_deferred_list_work(struct work_struct *work)
struct svm_range_list *svms;
struct svm_range *prange;
struct mm_struct *mm;
+ struct kfd_process *p;
svms = container_of(work, struct svm_range_list, deferred_list_work);
pr_debug("enter svms 0x%p\n", svms);
+ p = container_of(svms, struct kfd_process, svms);
+ /* Avoid mm is gone when inserting mmu notifier */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ pr_debug("svms 0x%p process mm gone\n", svms);
+ return;
+ }
+retry:
+ mmap_write_lock(mm);
+
+ /* Checking for the need to drain retry faults must be inside
+ * mmap write lock to serialize with munmap notifiers.
+ */
+ if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+ mmap_write_unlock(mm);
+ svm_range_drain_retry_fault(svms);
+ goto retry;
+ }
+
spin_lock(&svms->deferred_list_lock);
while (!list_empty(&svms->deferred_range_list)) {
prange = list_first_entry(&svms->deferred_range_list,
struct svm_range, deferred_list);
+ list_del_init(&prange->deferred_list);
spin_unlock(&svms->deferred_list_lock);
+
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
prange->start, prange->last, prange->work_item.op);
- /* Make sure no stale retry fault coming after range is freed */
- if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
- svm_range_drain_retry_fault(prange->svms);
-
- mm = prange->work_item.mm;
- mmap_write_lock(mm);
mutex_lock(&svms->lock);
-
- /* Remove from deferred_list must be inside mmap write lock,
- * otherwise, svm_range_list_lock_and_flush_work may hold mmap
- * write lock, and continue because deferred_list is empty, then
- * deferred_list handle is blocked by mmap write lock.
- */
- spin_lock(&svms->deferred_list_lock);
- list_del_init(&prange->deferred_list);
- spin_unlock(&svms->deferred_list_lock);
-
mutex_lock(&prange->migrate_mutex);
while (!list_empty(&prange->child_list)) {
struct svm_range *pchild;
@@ -2032,12 +2044,13 @@ static void svm_range_deferred_list_work(struct work_struct *work)
svm_range_handle_list_op(svms, prange);
mutex_unlock(&svms->lock);
- mmap_write_unlock(mm);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
+ mmap_write_unlock(mm);
+ mmput(mm);
pr_debug("exit svms 0x%p\n", svms);
}
@@ -2124,6 +2137,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
prange, prange->start, prange->last, start, last);
+ /* Make sure pending page faults are drained in the deferred worker
+ * before the range is freed to avoid straggler interrupts on
+ * unmapped memory causing "phantom faults".
+ */
+ atomic_inc(&svms->drain_pagefaults);
+
unmap_parent = start <= prange->start && last >= prange->last;
list_for_each_entry(pchild, &prange->child_list, child_list) {
@@ -2261,7 +2280,7 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
* migration if actual loc is not best location, then update GPU page table
* mapping to the best location.
*
- * If vm fault gpu is range preferred loc, the best_loc is preferred loc.
+ * If the preferred loc is accessible by faulting GPU, use preferred loc.
* If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
* If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
* if range actual loc is cpu, best_loc is cpu
@@ -2278,7 +2297,7 @@ svm_range_best_restore_location(struct svm_range *prange,
struct amdgpu_device *adev,
int32_t *gpuidx)
{
- struct amdgpu_device *bo_adev;
+ struct amdgpu_device *bo_adev, *preferred_adev;
struct kfd_process *p;
uint32_t gpuid;
int r;
@@ -2291,8 +2310,16 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
}
- if (prange->preferred_loc == gpuid)
+ if (prange->preferred_loc == gpuid ||
+ prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
return prange->preferred_loc;
+ } else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+ preferred_adev = svm_range_get_adev_by_id(prange,
+ prange->preferred_loc);
+ if (amdgpu_xgmi_same_hive(adev, preferred_adev))
+ return prange->preferred_loc;
+ /* fall through */
+ }
if (test_bit(*gpuidx, prange->bitmap_access))
return gpuid;
@@ -2313,7 +2340,8 @@ svm_range_best_restore_location(struct svm_range *prange,
static int
svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
- unsigned long *start, unsigned long *last)
+ unsigned long *start, unsigned long *last,
+ bool *is_heap_stack)
{
struct vm_area_struct *vma;
struct interval_tree_node *node;
@@ -2324,6 +2352,12 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
pr_debug("VMA does not exist in address [0x%llx]\n", addr);
return -EFAULT;
}
+
+ *is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
+ vma->vm_end >= vma->vm_mm->start_brk) ||
+ (vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack);
+
start_limit = max(vma->vm_start >> PAGE_SHIFT,
(unsigned long)ALIGN_DOWN(addr, 2UL << 8));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
@@ -2353,9 +2387,9 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*start = start_limit;
*last = end_limit - 1;
- pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
- vma->vm_start >> PAGE_SHIFT, *start,
- vma->vm_end >> PAGE_SHIFT, *last);
+ pr_debug("vma [0x%lx 0x%lx] range [0x%lx 0x%lx] is_heap_stack %d\n",
+ vma->vm_start >> PAGE_SHIFT, vma->vm_end >> PAGE_SHIFT,
+ *start, *last, *is_heap_stack);
return 0;
}
@@ -2420,11 +2454,13 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
struct svm_range *prange = NULL;
unsigned long start, last;
uint32_t gpuid, gpuidx;
+ bool is_heap_stack;
uint64_t bo_s = 0;
uint64_t bo_l = 0;
int r;
- if (svm_range_get_range_boundaries(p, addr, &start, &last))
+ if (svm_range_get_range_boundaries(p, addr, &start, &last,
+ &is_heap_stack))
return NULL;
r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
@@ -2451,6 +2487,9 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
return NULL;
}
+ if (is_heap_stack)
+ prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM;
+
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
@@ -2523,20 +2562,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
}
static bool
-svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
{
unsigned long requested = VM_READ;
- struct vm_area_struct *vma;
if (write_fault)
requested |= VM_WRITE;
- vma = find_vma(mm, addr << PAGE_SHIFT);
- if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
- pr_debug("address 0x%llx VMA is removed\n", addr);
- return true;
- }
-
pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
vma->vm_flags);
return (vma->vm_flags & requested) == requested;
@@ -2554,6 +2586,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
int32_t best_loc;
int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
+ struct vm_area_struct *vma;
int r = 0;
if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2564,7 +2597,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
p = kfd_lookup_process_by_pasid(pasid);
if (!p) {
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
- return -ESRCH;
+ return 0;
}
if (!p->xnack_enabled) {
pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
@@ -2575,10 +2608,19 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+ if (atomic_read(&svms->drain_pagefaults)) {
+ pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ r = 0;
+ goto out;
+ }
+
+ /* p->lead_thread is available as kfd_process_wq_release flush the work
+ * before releasing task ref.
+ */
mm = get_task_mm(p->lead_thread);
if (!mm) {
pr_debug("svms 0x%p failed to get mm\n", svms);
- r = -ESRCH;
+ r = 0;
goto out;
}
@@ -2616,6 +2658,7 @@ retry_write_locked:
if (svm_range_skip_recover(prange)) {
amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+ r = 0;
goto out_unlock_range;
}
@@ -2624,10 +2667,21 @@ retry_write_locked:
if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
svms, prange->start, prange->last);
+ r = 0;
+ goto out_unlock_range;
+ }
+
+ /* __do_munmap removed VMA, return success as we are handling stale
+ * retry fault.
+ */
+ vma = find_vma(mm, addr << PAGE_SHIFT);
+ if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+ pr_debug("address 0x%llx VMA is removed\n", addr);
+ r = 0;
goto out_unlock_range;
}
- if (!svm_fault_allowed(mm, addr, write_fault)) {
+ if (!svm_fault_allowed(vma, write_fault)) {
pr_debug("fault addr 0x%llx no %s permission\n", addr,
write_fault ? "write" : "read");
r = -EPERM;
@@ -2705,6 +2759,14 @@ void svm_range_list_fini(struct kfd_process *p)
/* Ensure list work is finished before process is destroyed */
flush_work(&p->svms.deferred_list_work);
+ /*
+ * Ensure no retry fault comes in afterwards, as page fault handler will
+ * not find kfd process and take mm lock to recover fault.
+ */
+ atomic_inc(&p->svms.drain_pagefaults);
+ svm_range_drain_retry_fault(&p->svms);
+
+
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
svm_range_unlink(prange);
svm_range_remove_notifier(prange);
@@ -2725,6 +2787,7 @@ int svm_range_list_init(struct kfd_process *p)
mutex_init(&svms->lock);
INIT_LIST_HEAD(&svms->list);
atomic_set(&svms->evicted_ranges, 0);
+ atomic_set(&svms->drain_pagefaults, 0);
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
@@ -3076,6 +3139,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
struct svm_range *prange =
list_first_entry(&svm_bo->range_list,
struct svm_range, svm_bo_list);
+ int retries = 3;
+
list_del_init(&prange->svm_bo_list);
spin_unlock(&svm_bo->list_lock);
@@ -3083,7 +3148,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
prange->start, prange->last);
mutex_lock(&prange->migrate_mutex);
- svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);
+ do {
+ svm_migrate_vram_to_ram(prange,
+ svm_bo->eviction_fence->mm);
+ } while (prange->actual_loc && --retries);
+ WARN(prange->actual_loc, "Migration failed during eviction");
mutex_lock(&prange->lock);
prange->svm_bo = NULL;
@@ -3108,7 +3177,6 @@ static int
svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
{
- struct amdkfd_process_info *process_info = p->kgd_process_info;
struct mm_struct *mm = current->mm;
struct list_head update_list;
struct list_head insert_list;
@@ -3127,8 +3195,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svms = &p->svms;
- mutex_lock(&process_info->lock);
-
svm_range_list_lock_and_flush_work(svms, mm);
r = svm_range_is_valid(p, start, size);
@@ -3204,8 +3270,6 @@ out_unlock_range:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
out:
- mutex_unlock(&process_info->lock);
-
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r);