aboutsummaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c193
1 files changed, 91 insertions, 102 deletions
diff --git a/mm/memory.c b/mm/memory.c
index adf9b9ef8277..8f1de811a1dc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -433,35 +433,39 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
}
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
{
- spinlock_t *ptl;
- pgtable_t new = pte_alloc_one(mm);
- if (!new)
- return -ENOMEM;
+ spinlock_t *ptl = pmd_lock(mm, pmd);
- /*
- * Ensure all pte setup (eg. pte page lock and page clearing) are
- * visible before the pte is made visible to other CPUs by being
- * put into page tables.
- *
- * The other side of the story is the pointer chasing in the page
- * table walking code (when walking the page table without locking;
- * ie. most of the time). Fortunately, these data accesses consist
- * of a chain of data-dependent loads, meaning most CPUs (alpha
- * being the notable exception) will already guarantee loads are
- * seen in-order. See the alpha page table accessors for the
- * smp_rmb() barriers in page table walking code.
- */
- smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
-
- ptl = pmd_lock(mm, pmd);
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
mm_inc_nr_ptes(mm);
- pmd_populate(mm, pmd, new);
- new = NULL;
+ /*
+ * Ensure all pte setup (eg. pte page lock and page clearing) are
+ * visible before the pte is made visible to other CPUs by being
+ * put into page tables.
+ *
+ * The other side of the story is the pointer chasing in the page
+ * table walking code (when walking the page table without locking;
+ * ie. most of the time). Fortunately, these data accesses consist
+ * of a chain of data-dependent loads, meaning most CPUs (alpha
+ * being the notable exception) will already guarantee loads are
+ * seen in-order. See the alpha page table accessors for the
+ * smp_rmb() barriers in page table walking code.
+ */
+ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
+ pmd_populate(mm, pmd, *pte);
+ *pte = NULL;
}
spin_unlock(ptl);
+}
+
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+{
+ pgtable_t new = pte_alloc_one(mm);
+ if (!new)
+ return -ENOMEM;
+
+ pmd_install(mm, pmd, &new);
if (new)
pte_free(mm, new);
return 0;
@@ -473,10 +477,9 @@ int __pte_alloc_kernel(pmd_t *pmd)
if (!new)
return -ENOMEM;
- smp_wmb(); /* See comment in __pte_alloc */
-
spin_lock(&init_mm.page_table_lock);
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
+ smp_wmb(); /* See comment in pmd_install() */
pmd_populate_kernel(&init_mm, pmd, new);
new = NULL;
}
@@ -990,7 +993,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma,
if (!new_page)
return NULL;
- if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) {
+ if (mem_cgroup_charge(page_folio(new_page), src_mm, GFP_KERNEL)) {
put_page(new_page);
return NULL;
}
@@ -1333,16 +1336,8 @@ again:
struct page *page;
page = vm_normal_page(vma, addr, ptent);
- if (unlikely(details) && page) {
- /*
- * unmap_shared_mapping_pages() wants to
- * invalidate cache without truncating:
- * unmap shared but keep private pages.
- */
- if (details->check_mapping &&
- details->check_mapping != page_rmapping(page))
- continue;
- }
+ if (unlikely(zap_skip_check_mapping(details, page)))
+ continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
@@ -1375,17 +1370,8 @@ again:
is_device_exclusive_entry(entry)) {
struct page *page = pfn_swap_entry_to_page(entry);
- if (unlikely(details && details->check_mapping)) {
- /*
- * unmap_shared_mapping_pages() wants to
- * invalidate cache without truncating:
- * unmap shared but keep private pages.
- */
- if (details->check_mapping !=
- page_rmapping(page))
- continue;
- }
-
+ if (unlikely(zap_skip_check_mapping(details, page)))
+ continue;
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
rss[mm_counter(page)]--;
@@ -2724,19 +2710,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
* proceeding (but do_wp_page is only called after already making such a check;
* and do_anonymous_page can safely check later on).
*/
-static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
- pte_t *page_table, pte_t orig_pte)
+static inline int pte_unmap_same(struct vm_fault *vmf)
{
int same = 1;
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
if (sizeof(pte_t) > sizeof(unsigned long)) {
- spinlock_t *ptl = pte_lockptr(mm, pmd);
+ spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
spin_lock(ptl);
- same = pte_same(*page_table, orig_pte);
+ same = pte_same(*vmf->pte, vmf->orig_pte);
spin_unlock(ptl);
}
#endif
- pte_unmap(page_table);
+ pte_unmap(vmf->pte);
+ vmf->pte = NULL;
return same;
}
@@ -3019,7 +3005,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
}
- if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
+ if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL))
goto oom_free_new;
cgroup_throttle_swaprate(new_page, GFP_KERNEL);
@@ -3321,20 +3307,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
}
static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
+ pgoff_t first_index,
+ pgoff_t last_index,
struct zap_details *details)
{
struct vm_area_struct *vma;
pgoff_t vba, vea, zba, zea;
- vma_interval_tree_foreach(vma, root,
- details->first_index, details->last_index) {
-
+ vma_interval_tree_foreach(vma, root, first_index, last_index) {
vba = vma->vm_pgoff;
vea = vba + vma_pages(vma) - 1;
- zba = details->first_index;
+ zba = first_index;
if (zba < vba)
zba = vba;
- zea = details->last_index;
+ zea = last_index;
if (zea > vea)
zea = vea;
@@ -3360,18 +3346,22 @@ void unmap_mapping_page(struct page *page)
{
struct address_space *mapping = page->mapping;
struct zap_details details = { };
+ pgoff_t first_index;
+ pgoff_t last_index;
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(PageTail(page));
- details.check_mapping = mapping;
- details.first_index = page->index;
- details.last_index = page->index + thp_nr_pages(page) - 1;
+ first_index = page->index;
+ last_index = page->index + thp_nr_pages(page) - 1;
+
+ details.zap_mapping = mapping;
details.single_page = page;
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
- unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+ last_index, &details);
i_mmap_unlock_write(mapping);
}
@@ -3391,16 +3381,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
pgoff_t nr, bool even_cows)
{
struct zap_details details = { };
+ pgoff_t first_index = start;
+ pgoff_t last_index = start + nr - 1;
- details.check_mapping = even_cows ? NULL : mapping;
- details.first_index = start;
- details.last_index = start + nr - 1;
- if (details.last_index < details.first_index)
- details.last_index = ULONG_MAX;
+ details.zap_mapping = even_cows ? NULL : mapping;
+ if (last_index < first_index)
+ last_index = ULONG_MAX;
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
- unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+ last_index, &details);
i_mmap_unlock_write(mapping);
}
EXPORT_SYMBOL_GPL(unmap_mapping_pages);
@@ -3488,7 +3479,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vm_fault_t ret = 0;
void *shadow = NULL;
- if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
+ if (!pte_unmap_same(vmf))
goto out;
entry = pte_to_swp_entry(vmf->orig_pte);
@@ -3539,7 +3530,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
shadow = get_shadow_from_swap_cache(entry);
if (shadow)
- workingset_refault(page, shadow);
+ workingset_refault(page_folio(page),
+ shadow);
lru_cache_add(page);
@@ -3769,7 +3761,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (!page)
goto oom;
- if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
+ if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL))
goto oom_free_page;
cgroup_throttle_swaprate(page, GFP_KERNEL);
@@ -3852,7 +3844,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
- smp_wmb(); /* See comment in __pte_alloc() */
}
ret = vma->vm_ops->fault(vmf);
@@ -3907,6 +3898,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
return ret;
/*
+ * Just backoff if any subpage of a THP is corrupted otherwise
+ * the corrupted page may mapped by PMD silently to escape the
+ * check. This kind of THP just can be PTE mapped. Access to
+ * the corrupted subpage should trigger SIGBUS as expected.
+ */
+ if (unlikely(PageHasHWPoisoned(page)))
+ return ret;
+
+ /*
* Archs like ppc64 need additional space to store information
* related to pte entry. Use the preallocated table for that.
*/
@@ -3914,7 +3914,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
- smp_wmb(); /* See comment in __pte_alloc() */
}
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -4027,17 +4026,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}
- if (vmf->prealloc_pte) {
- vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
- if (likely(pmd_none(*vmf->pmd))) {
- mm_inc_nr_ptes(vma->vm_mm);
- pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
- vmf->prealloc_pte = NULL;
- }
- spin_unlock(vmf->ptl);
- } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
+ if (vmf->prealloc_pte)
+ pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
+ else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
return VM_FAULT_OOM;
- }
}
/* See comment in handle_pte_fault() */
@@ -4146,7 +4138,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
- smp_wmb(); /* See comment in __pte_alloc() */
}
return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
@@ -4193,7 +4184,8 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf)
if (!vmf->cow_page)
return VM_FAULT_OOM;
- if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) {
+ if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm,
+ GFP_KERNEL)) {
put_page(vmf->cow_page);
return VM_FAULT_OOM;
}
@@ -4258,7 +4250,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
* We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults).
* The mmap_lock may have been released depending on flags and our
- * return value. See filemap_fault() and __lock_page_or_retry().
+ * return value. See filemap_fault() and __folio_lock_or_retry().
* If mmap_lock is released, vma may become invalid (for example
* by other thread calling munmap()).
*/
@@ -4499,7 +4491,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
* concurrent faults).
*
* The mmap_lock may have been released depending on flags and our return value.
- * See filemap_fault() and __lock_page_or_retry().
+ * See filemap_fault() and __folio_lock_or_retry().
*/
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
{
@@ -4603,7 +4595,7 @@ unlock:
* By the time we get here, we already hold the mm semaphore
*
* The mmap_lock may have been released depending on flags and our
- * return value. See filemap_fault() and __lock_page_or_retry().
+ * return value. See filemap_fault() and __folio_lock_or_retry().
*/
static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -4759,7 +4751,7 @@ static inline void mm_account_fault(struct pt_regs *regs,
* By the time we get here, we already hold the mm semaphore
*
* The mmap_lock may have been released depending on flags and our
- * return value. See filemap_fault() and __lock_page_or_retry().
+ * return value. See filemap_fault() and __folio_lock_or_retry().
*/
vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
unsigned int flags, struct pt_regs *regs)
@@ -4820,13 +4812,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
if (!new)
return -ENOMEM;
- smp_wmb(); /* See comment in __pte_alloc */
-
spin_lock(&mm->page_table_lock);
- if (pgd_present(*pgd)) /* Another has populated it */
+ if (pgd_present(*pgd)) { /* Another has populated it */
p4d_free(mm, new);
- else
+ } else {
+ smp_wmb(); /* See comment in pmd_install() */
pgd_populate(mm, pgd, new);
+ }
spin_unlock(&mm->page_table_lock);
return 0;
}
@@ -4843,11 +4835,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
if (!new)
return -ENOMEM;
- smp_wmb(); /* See comment in __pte_alloc */
-
spin_lock(&mm->page_table_lock);
if (!p4d_present(*p4d)) {
mm_inc_nr_puds(mm);
+ smp_wmb(); /* See comment in pmd_install() */
p4d_populate(mm, p4d, new);
} else /* Another has populated it */
pud_free(mm, new);
@@ -4868,14 +4859,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
if (!new)
return -ENOMEM;
- smp_wmb(); /* See comment in __pte_alloc */
-
ptl = pud_lock(mm, pud);
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
+ smp_wmb(); /* See comment in pmd_install() */
pud_populate(mm, pud, new);
- } else /* Another has populated it */
+ } else { /* Another has populated it */
pmd_free(mm, new);
+ }
spin_unlock(ptl);
return 0;
}
@@ -5256,7 +5247,7 @@ void __might_fault(const char *file, int line)
return;
if (pagefault_disabled())
return;
- __might_sleep(file, line, 0);
+ __might_sleep(file, line);
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
if (current->mm)
might_lock_read(&current->mm->mmap_lock);
@@ -5412,7 +5403,6 @@ long copy_huge_page_from_user(struct page *dst_page,
unsigned int pages_per_huge_page,
bool allow_pagefault)
{
- void *src = (void *)usr_src;
void *page_kaddr;
unsigned long i, rc = 0;
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
@@ -5425,8 +5415,7 @@ long copy_huge_page_from_user(struct page *dst_page,
else
page_kaddr = kmap_atomic(subpage);
rc = copy_from_user(page_kaddr,
- (const void __user *)(src + i * PAGE_SIZE),
- PAGE_SIZE);
+ usr_src + i * PAGE_SIZE, PAGE_SIZE);
if (allow_pagefault)
kunmap(subpage);
else