diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 36 | ||||
-rw-r--r-- | mm/memory.c | 47 | ||||
-rw-r--r-- | mm/truncate.c | 75 | ||||
-rw-r--r-- | mm/workingset.c | 3 |
4 files changed, 114 insertions, 47 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 82f26cde830c..d0e4d1002059 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) } EXPORT_SYMBOL_GPL(add_page_wait_queue); +#ifndef clear_bit_unlock_is_negative_byte + +/* + * PG_waiters is the high bit in the same byte as PG_lock. + * + * On x86 (and on many other architectures), we can clear PG_lock and + * test the sign bit at the same time. But if the architecture does + * not support that special operation, we just do this all by hand + * instead. + * + * The read of PG_waiters has to be after (or concurrently with) PG_locked + * being cleared, but a memory barrier should be unneccssary since it is + * in the same byte as PG_locked. + */ +static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) +{ + clear_bit_unlock(nr, mem); + /* smp_mb__after_atomic(); */ + return test_bit(PG_waiters, mem); +} + +#endif + /** * unlock_page - unlock a locked page * @page: the page @@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ void unlock_page(struct page *page) { + BUILD_BUG_ON(PG_waiters != 7); page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); - clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_atomic(); - wake_up_page(page, PG_locked); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } EXPORT_SYMBOL(unlock_page); diff --git a/mm/memory.c b/mm/memory.c index 7d23b5050248..9f2c15cdb32c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) ret = 0; count_vm_event(THP_FILE_MAPPED); out: - /* - * If we are going to fallback to pte mapping, do a - * withdraw with pmd lock held. - */ - if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) - vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, - vmf->pmd); spin_unlock(vmf->ptl); return ret; } @@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, ret = do_set_pmd(vmf, page); if (ret != VM_FAULT_FALLBACK) - goto fault_handled; + return ret; } if (!vmf->pte) { ret = pte_alloc_one_map(vmf); if (ret) - goto fault_handled; + return ret; } /* Re-check under ptl */ - if (unlikely(!pte_none(*vmf->pte))) { - ret = VM_FAULT_NOPAGE; - goto fault_handled; - } + if (unlikely(!pte_none(*vmf->pte))) + return VM_FAULT_NOPAGE; flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); @@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, vmf->address, vmf->pte); - ret = 0; -fault_handled: - /* preallocated pagetable is unused: free it */ - if (vmf->prealloc_pte) { - pte_free(vmf->vma->vm_mm, vmf->prealloc_pte); - vmf->prealloc_pte = 0; - } - return ret; + return 0; } @@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf) static int do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; + int ret; /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) - return VM_FAULT_SIGBUS; - if (!(vmf->flags & FAULT_FLAG_WRITE)) - return do_read_fault(vmf); - if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(vmf); - return do_shared_fault(vmf); + ret = VM_FAULT_SIGBUS; + else if (!(vmf->flags & FAULT_FLAG_WRITE)) + ret = do_read_fault(vmf); + else if (!(vma->vm_flags & VM_SHARED)) + ret = do_cow_fault(vmf); + else + ret = do_shared_fault(vmf); + + /* preallocated pagetable is unused: free it */ + if (vmf->prealloc_pte) { + pte_free(vma->vm_mm, vmf->prealloc_pte); + vmf->prealloc_pte = 0; + } + return ret; } static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, diff --git a/mm/truncate.c b/mm/truncate.c index fd97f1dbce29..dd7b24e083c5 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -24,20 +24,12 @@ #include <linux/rmap.h> #include "internal.h" -static void clear_exceptional_entry(struct address_space *mapping, - pgoff_t index, void *entry) +static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, + void *entry) { struct radix_tree_node *node; void **slot; - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) - return; - - if (dax_mapping(mapping)) { - dax_delete_mapping_entry(mapping, index); - return; - } spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even @@ -55,6 +47,56 @@ unlock: spin_unlock_irq(&mapping->tree_lock); } +/* + * Unconditionally remove exceptional entry. Usually called from truncate path. + */ +static void truncate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return; + + if (dax_mapping(mapping)) { + dax_delete_mapping_entry(mapping, index); + return; + } + clear_shadow_entry(mapping, index, entry); +} + +/* + * Invalidate exceptional entry if easily possible. This handles exceptional + * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and + * clean entries. + */ +static int invalidate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + +/* + * Invalidate exceptional entry if clean. This handles exceptional entries for + * invalidate_inode_pages2() so for DAX it evicts only clean entries. + */ +static int invalidate_exceptional_entry2(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry_sync(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + /** * do_invalidatepage - invalidate part or all of a page * @page: the page which is affected @@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, } if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + invalidate_exceptional_entry(mapping, index, + page); continue; } @@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + if (!invalidate_exceptional_entry2(mapping, + index, page)) + ret = -EBUSY; continue; } diff --git a/mm/workingset.c b/mm/workingset.c index 241fa5d6b3b2..abb58ffa3c64 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -473,7 +473,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); - __radix_tree_delete_node(&mapping->page_tree, node); + __radix_tree_delete_node(&mapping->page_tree, node, + workingset_update_node, mapping); out_invalid: spin_unlock(&mapping->tree_lock); |