aboutsummaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c75
1 files changed, 52 insertions, 23 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 4bcd79619574..ebfc9768f801 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2632,11 +2632,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
-/*
- * Variant of remap_pfn_range that does not call track_pfn_remap. The caller
- * must have pre-validated the caching bits of the pgprot_t.
- */
-int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
pgd_t *pgd;
@@ -2689,6 +2685,27 @@ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
return 0;
}
+/*
+ * Variant of remap_pfn_range that does not call track_pfn_remap. The caller
+ * must have pre-validated the caching bits of the pgprot_t.
+ */
+int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ int error = remap_pfn_range_internal(vma, addr, pfn, size, prot);
+
+ if (!error)
+ return 0;
+
+ /*
+ * A partial pfn range mapping is dangerous: it does not
+ * maintain page reference counts, and callers may free
+ * pages due to the error. So zap it early.
+ */
+ zap_page_range_single(vma, addr, size, NULL);
+ return error;
+}
+
/**
* remap_pfn_range - remap kernel memory to userspace
* @vma: user vma to map to
@@ -4780,7 +4797,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
- bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
+ bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE);
pte_t entry;
flush_icache_pages(vma, page, nr);
@@ -5295,7 +5312,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
- goto out;
+ return 0;
}
pte = pte_modify(old_pte, vma->vm_page_prot);
@@ -5358,23 +5375,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
nid = target_nid;
flags |= TNF_MIGRATED;
- } else {
- flags |= TNF_MIGRATE_FAIL;
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
- vmf->address, &vmf->ptl);
- if (unlikely(!vmf->pte))
- goto out;
- if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
- pte_unmap_unlock(vmf->pte, vmf->ptl);
- goto out;
- }
- goto out_map;
+ task_numa_fault(last_cpupid, nid, nr_pages, flags);
+ return 0;
}
-out:
- if (nid != NUMA_NO_NODE)
- task_numa_fault(last_cpupid, nid, nr_pages, flags);
- return 0;
+ flags |= TNF_MIGRATE_FAIL;
+ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+ vmf->address, &vmf->ptl);
+ if (unlikely(!vmf->pte))
+ return 0;
+ if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return 0;
+ }
out_map:
/*
* Make it present again, depending on how arch implements
@@ -5387,7 +5400,10 @@ out_map:
numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
writable);
pte_unmap_unlock(vmf->pte, vmf->ptl);
- goto out;
+
+ if (nid != NUMA_NO_NODE)
+ task_numa_fault(last_cpupid, nid, nr_pages, flags);
+ return 0;
}
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
@@ -5801,6 +5817,7 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/* If the fault handler drops the mmap_lock, vma may be freed */
struct mm_struct *mm = vma->vm_mm;
vm_fault_t ret;
+ bool is_droppable;
__set_current_state(TASK_RUNNING);
@@ -5815,6 +5832,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
goto out;
}
+ is_droppable = !!(vma->vm_flags & VM_DROPPABLE);
+
/*
* Enable the memcg OOM handling for faults triggered in user
* space. Kernel faults are handled more gracefully.
@@ -5829,8 +5848,18 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
else
ret = __handle_mm_fault(vma, address, flags);
+ /*
+ * Warning: It is no longer safe to dereference vma-> after this point,
+ * because mmap_lock might have been dropped by __handle_mm_fault(), so
+ * vma might be destroyed from underneath us.
+ */
+
lru_gen_exit_fault();
+ /* If the mapping is droppable, then errors due to OOM aren't fatal. */
+ if (is_droppable)
+ ret &= ~VM_FAULT_OOM;
+
if (flags & FAULT_FLAG_USER) {
mem_cgroup_exit_user_fault();
/*