aboutsummaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c86
1 files changed, 57 insertions, 29 deletions
diff --git a/mm/memory.c b/mm/memory.c
index d67fd9fcf1f2..5c55270729f7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
return 0;
}
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
return VM_FAULT_OOM;
}
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page)
{
@@ -2744,7 +2752,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
- pte_mksoft_dirty(entry);
+ entry = pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
@@ -2758,22 +2766,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
update_mmu_cache(vma, address, pte);
}
-static unsigned long fault_around_bytes = 65536;
-
-/*
- * fault_around_pages() and fault_around_mask() round down fault_around_bytes
- * to nearest page order. It's what do_fault_around() expects to see.
- */
-static inline unsigned long fault_around_pages(void)
-{
- return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE;
-}
-
-static inline unsigned long fault_around_mask(void)
-{
- return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK;
-}
-
+static unsigned long fault_around_bytes __read_mostly =
+ rounddown_pow_of_two(65536);
#ifdef CONFIG_DEBUG_FS
static int fault_around_bytes_get(void *data, u64 *val)
@@ -2782,11 +2776,19 @@ static int fault_around_bytes_get(void *data, u64 *val)
return 0;
}
+/*
+ * fault_around_pages() and fault_around_mask() expects fault_around_bytes
+ * rounded down to nearest page order. It's what do_fault_around() expects to
+ * see.
+ */
static int fault_around_bytes_set(void *data, u64 val)
{
if (val / PAGE_SIZE > PTRS_PER_PTE)
return -EINVAL;
- fault_around_bytes = val;
+ if (val > PAGE_SIZE)
+ fault_around_bytes = rounddown_pow_of_two(val);
+ else
+ fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops,
@@ -2831,12 +2833,15 @@ late_initcall(fault_around_debugfs);
static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
pte_t *pte, pgoff_t pgoff, unsigned int flags)
{
- unsigned long start_addr;
+ unsigned long start_addr, nr_pages, mask;
pgoff_t max_pgoff;
struct vm_fault vmf;
int off;
- start_addr = max(address & fault_around_mask(), vma->vm_start);
+ nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
+ mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
+
+ start_addr = max(address & mask, vma->vm_start);
off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
pte -= off;
pgoff -= off;
@@ -2848,7 +2853,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
PTRS_PER_PTE - 1;
max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
- pgoff + fault_around_pages() - 1);
+ pgoff + nr_pages - 1);
/* Check if it makes any sense to call ->map_pages */
while (!pte_none(*pte)) {
@@ -2882,7 +2887,8 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* if page by the offset is not ready to be mapped (cold cache or
* something).
*/
- if (vma->vm_ops->map_pages && fault_around_pages() > 1) {
+ if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
+ fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
@@ -3012,6 +3018,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
@@ -3036,7 +3048,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3168,7 +3182,10 @@ out:
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
@@ -3177,7 +3194,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte_t entry;
spinlock_t *ptl;
- entry = *pte;
+ entry = ACCESS_ONCE(*pte);
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
@@ -3228,6 +3245,9 @@ unlock:
/*
* By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -3309,6 +3329,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
@@ -3587,11 +3613,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
ret = get_user_pages(tsk, mm, addr, 1,
write, 1, &page, &vma);
if (ret <= 0) {
+#ifndef CONFIG_HAVE_IOREMAP_PROT
+ break;
+#else
/*
* Check if this is a VM_IO | VM_PFNMAP VMA, which
* we can access using slightly different code.
*/
-#ifdef CONFIG_HAVE_IOREMAP_PROT
vma = find_vma(mm, addr);
if (!vma || vma->vm_start > addr)
break;
@@ -3599,9 +3627,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
ret = vma->vm_ops->access(vma, addr, buf,
len, write);
if (ret <= 0)
-#endif
break;
bytes = ret;
+#endif
} else {
bytes = len;
offset = addr & (PAGE_SIZE-1);