diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 1 | ||||
-rw-r--r-- | mm/debug.c | 4 | ||||
-rw-r--r-- | mm/gup.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 19 | ||||
-rw-r--r-- | mm/kasan/Makefile | 3 | ||||
-rw-r--r-- | mm/kasan/common.c | 29 | ||||
-rw-r--r-- | mm/kasan/tags.c | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 10 | ||||
-rw-r--r-- | mm/maccess.c | 6 | ||||
-rw-r--r-- | mm/memblock.c | 11 | ||||
-rw-r--r-- | mm/memory-failure.c | 3 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 85 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/migrate.c | 23 | ||||
-rw-r--r-- | mm/mincore.c | 94 | ||||
-rw-r--r-- | mm/mmap.c | 7 | ||||
-rw-r--r-- | mm/oom_kill.c | 12 | ||||
-rw-r--r-- | mm/page_alloc.c | 32 | ||||
-rw-r--r-- | mm/page_ext.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 12 | ||||
-rw-r--r-- | mm/slab.c | 15 | ||||
-rw-r--r-- | mm/slab.h | 7 | ||||
-rw-r--r-- | mm/slab_common.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 59 | ||||
-rw-r--r-- | mm/swap.c | 17 | ||||
-rw-r--r-- | mm/util.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 10 |
27 files changed, 304 insertions, 175 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8a8bb8796c6c..72e6d0c55cfa 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -689,6 +689,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; mutex_init(&bdi->cgwb_release_mutex); + init_rwsem(&bdi->wb_switch_rwsem); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { diff --git a/mm/debug.c b/mm/debug.c index 0abb987dad9b..1611cf00a137 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -44,7 +44,7 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping; bool page_poisoned = PagePoisoned(page); int mapcount; @@ -58,6 +58,8 @@ void __dump_page(struct page *page, const char *reason) goto hex_only; } + mapping = page_mapping(page); + /* * Avoid VM_BUG_ON() in page_mapcount(). * page->_mapcount space in struct page is used by sl[aou]b pages to @@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (!pmd_present(pmd)) return 0; - if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { + if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || + pmd_devmap(pmd))) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they diff --git a/mm/hugetlb.c b/mm/hugetlb.c index df2e7dd5ff17..8dfdffc34a99 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3624,7 +3624,6 @@ retry_avoidcopy: copy_user_huge_page(new_page, old_page, address, vma, pages_per_huge_page(h)); __SetPageUptodate(new_page); - set_page_huge_active(new_page); mmu_notifier_range_init(&range, mm, haddr, haddr + huge_page_size(h)); mmu_notifier_invalidate_range_start(&range); @@ -3645,6 +3644,7 @@ retry_avoidcopy: make_huge_pte(vma, new_page, 1)); page_remove_rmap(old_page, true); hugepage_add_new_anon_rmap(new_page, vma, haddr); + set_page_huge_active(new_page); /* Make the old page be freed below */ new_page = old_page; } @@ -3729,6 +3729,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, pte_t new_pte; spinlock_t *ptl; unsigned long haddr = address & huge_page_mask(h); + bool new_page = false; /* * Currently, we are forced to kill the process in the event the @@ -3790,7 +3791,7 @@ retry: } clear_huge_page(page, address, pages_per_huge_page(h)); __SetPageUptodate(page); - set_page_huge_active(page); + new_page = true; if (vma->vm_flags & VM_MAYSHARE) { int err = huge_add_to_page_cache(page, mapping, idx); @@ -3861,6 +3862,15 @@ retry: } spin_unlock(ptl); + + /* + * Only make newly allocated pages active. Existing pages found + * in the pagecache could be !page_huge_active() if they have been + * isolated for migration. + */ + if (new_page) + set_page_huge_active(page); + unlock_page(page); out: return ret; @@ -4095,7 +4105,6 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, * the set_pte_at() write. */ __SetPageUptodate(page); - set_page_huge_active(page); mapping = dst_vma->vm_file->f_mapping; idx = vma_hugecache_offset(h, dst_vma, dst_addr); @@ -4163,6 +4172,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, update_mmu_cache(dst_vma, dst_addr, dst_pte); spin_unlock(ptl); + set_page_huge_active(page); if (vm_shared) unlock_page(page); ret = 0; @@ -4268,7 +4278,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } if (ret & VM_FAULT_RETRY) { - if (nonblocking) + if (nonblocking && + !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *nonblocking = 0; *nr_pages = 0; /* diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 0a14fcff70ed..5d1065efbd47 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -5,7 +5,10 @@ UBSAN_SANITIZE_generic.o := n UBSAN_SANITIZE_tags.o := n KCOV_INSTRUMENT := n +CFLAGS_REMOVE_common.o = -pg CFLAGS_REMOVE_generic.o = -pg +CFLAGS_REMOVE_tags.o = -pg + # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 73c9cbfdedf4..09b534fbba17 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -361,10 +361,15 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object) * get different tags. */ static u8 assign_tag(struct kmem_cache *cache, const void *object, - bool init, bool krealloc) + bool init, bool keep_tag) { - /* Reuse the same tag for krealloc'ed objects. */ - if (krealloc) + /* + * 1. When an object is kmalloc()'ed, two hooks are called: + * kasan_slab_alloc() and kasan_kmalloc(). We assign the + * tag only in the first one. + * 2. We reuse the same tag for krealloc'ed objects. + */ + if (keep_tag) return get_tag(object); /* @@ -405,12 +410,6 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, return (void *)object; } -void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, - gfp_t flags) -{ - return kasan_kmalloc(cache, object, cache->object_size, flags); -} - static inline bool shadow_invalid(u8 tag, s8 shadow_byte) { if (IS_ENABLED(CONFIG_KASAN_GENERIC)) @@ -467,7 +466,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) } static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, - size_t size, gfp_t flags, bool krealloc) + size_t size, gfp_t flags, bool keep_tag) { unsigned long redzone_start; unsigned long redzone_end; @@ -485,7 +484,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, KASAN_SHADOW_SCALE_SIZE); if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - tag = assign_tag(cache, object, false, krealloc); + tag = assign_tag(cache, object, false, keep_tag); /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ kasan_unpoison_shadow(set_tag(object, tag), size); @@ -498,10 +497,16 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, return set_tag(object, tag); } +void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, + gfp_t flags) +{ + return __kasan_kmalloc(cache, object, cache->object_size, flags, false); +} + void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size, gfp_t flags) { - return __kasan_kmalloc(cache, object, size, flags, false); + return __kasan_kmalloc(cache, object, size, flags, true); } EXPORT_SYMBOL(kasan_kmalloc); diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c index 0777649e07c4..63fca3172659 100644 --- a/mm/kasan/tags.c +++ b/mm/kasan/tags.c @@ -46,7 +46,7 @@ void kasan_init_tags(void) int cpu; for_each_possible_cpu(cpu) - per_cpu(prng_state, cpu) = get_random_u32(); + per_cpu(prng_state, cpu) = (u32)get_cycles(); } /* diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f9d9dc250428..707fa5579f66 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -574,6 +574,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, unsigned long flags; struct kmemleak_object *object, *parent; struct rb_node **link, *rb_parent; + unsigned long untagged_ptr; object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); if (!object) { @@ -619,8 +620,9 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, write_lock_irqsave(&kmemleak_lock, flags); - min_addr = min(min_addr, ptr); - max_addr = max(max_addr, ptr + size); + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + min_addr = min(min_addr, untagged_ptr); + max_addr = max(max_addr, untagged_ptr + size); link = &object_tree_root.rb_node; rb_parent = NULL; while (*link) { @@ -1333,6 +1335,7 @@ static void scan_block(void *_start, void *_end, unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); unsigned long *end = _end - (BYTES_PER_POINTER - 1); unsigned long flags; + unsigned long untagged_ptr; read_lock_irqsave(&kmemleak_lock, flags); for (ptr = start; ptr < end; ptr++) { @@ -1347,7 +1350,8 @@ static void scan_block(void *_start, void *_end, pointer = *ptr; kasan_enable_current(); - if (pointer < min_addr || pointer >= max_addr) + untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); + if (untagged_ptr < min_addr || untagged_ptr >= max_addr) continue; /* diff --git a/mm/maccess.c b/mm/maccess.c index f3416632e5a4..ec00be51a24f 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -30,10 +30,8 @@ long __probe_kernel_read(void *dst, const void *src, size_t size) set_fs(KERNEL_DS); pagefault_disable(); - current->kernel_uaccess_faults_ok++; ret = __copy_from_user_inatomic(dst, (__force const void __user *)src, size); - current->kernel_uaccess_faults_ok--; pagefault_enable(); set_fs(old_fs); @@ -60,9 +58,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) set_fs(KERNEL_DS); pagefault_disable(); - current->kernel_uaccess_faults_ok++; ret = __copy_to_user_inatomic((__force void __user *)dst, src, size); - current->kernel_uaccess_faults_ok--; pagefault_enable(); set_fs(old_fs); @@ -98,13 +94,11 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count) set_fs(KERNEL_DS); pagefault_disable(); - current->kernel_uaccess_faults_ok++; do { ret = __get_user(*dst++, (const char __user __force *)src++); } while (dst[-1] && ret == 0 && src - unsafe_addr < count); - current->kernel_uaccess_faults_ok--; dst[-1] = '\0'; pagefault_enable(); set_fs(old_fs); diff --git a/mm/memblock.c b/mm/memblock.c index 022d4cbb3618..ea31045ba704 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -26,6 +26,13 @@ #include "internal.h" +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_PHYSMEM_REGIONS 4 + +#ifndef INIT_MEMBLOCK_RESERVED_REGIONS +# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#endif + /** * DOC: memblock overview * @@ -92,7 +99,7 @@ unsigned long max_pfn; unsigned long long max_possible_pfn; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; #endif @@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = { .reserved.regions = memblock_reserved_init_regions, .reserved.cnt = 1, /* empty dummy entry */ - .reserved.max = INIT_MEMBLOCK_REGIONS, + .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 7c72f2a95785..831be5ff5f4d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, if (fail || tk->addr_valid == 0) { pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pfn, tk->tsk->comm, tk->tsk->pid); - force_sig(SIGKILL, tk->tsk); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, + tk->tsk, PIDTYPE_PID); } /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b9a667d36c55..1ad28323fb9f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1188,11 +1188,13 @@ static inline int pageblock_free(struct page *page) return PageBuddy(page) && page_order(page) >= pageblock_order; } -/* Return the start of the next active pageblock after a given page */ -static struct page *next_active_pageblock(struct page *page) +/* Return the pfn of the start of the next active pageblock after a given pfn */ +static unsigned long next_active_pageblock(unsigned long pfn) { + struct page *page = pfn_to_page(pfn); + /* Ensure the starting page is pageblock-aligned */ - BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); + BUG_ON(pfn & (pageblock_nr_pages - 1)); /* If the entire pageblock is free, move to the end of free page */ if (pageblock_free(page)) { @@ -1200,16 +1202,16 @@ static struct page *next_active_pageblock(struct page *page) /* be careful. we don't have locks, page_order can be changed.*/ order = page_order(page); if ((order < MAX_ORDER) && (order >= pageblock_order)) - return page + (1 << order); + return pfn + (1 << order); } - return page + pageblock_nr_pages; + return pfn + pageblock_nr_pages; } -static bool is_pageblock_removable_nolock(struct page *page) +static bool is_pageblock_removable_nolock(unsigned long pfn) { + struct page *page = pfn_to_page(pfn); struct zone *zone; - unsigned long pfn; /* * We have to be careful here because we are iterating over memory @@ -1232,12 +1234,14 @@ static bool is_pageblock_removable_nolock(struct page *page) /* Checks if this range of memory is likely to be hot-removable. */ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { - struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn, pfn; + + end_pfn = min(start_pfn + nr_pages, + zone_end_pfn(page_zone(pfn_to_page(start_pfn)))); /* Check the starting page of each pageblock within the range */ - for (; page < end_page; page = next_active_pageblock(page)) { - if (!is_pageblock_removable_nolock(page)) + for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) { + if (!is_pageblock_removable_nolock(pfn)) return false; cond_resched(); } @@ -1273,6 +1277,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; @@ -1301,23 +1308,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, static unsigned long scan_movable_pages(unsigned long start, unsigned long end) { unsigned long pfn; - struct page *page; + for (pfn = start; pfn < end; pfn++) { - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (PageLRU(page)) - return pfn; - if (__PageMovable(page)) - return pfn; - if (PageHuge(page)) { - if (hugepage_migration_supported(page_hstate(page)) && - page_huge_active(page)) - return pfn; - else - pfn = round_up(pfn + 1, - 1 << compound_order(page)) - 1; - } - } + struct page *page, *head; + unsigned long skip; + + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (PageLRU(page)) + return pfn; + if (__PageMovable(page)) + return pfn; + + if (!PageHuge(page)) + continue; + head = compound_head(page); + if (hugepage_migration_supported(page_hstate(head)) && + page_huge_active(head)) + return pfn; + skip = (1 << compound_order(head)) - (page - head); + pfn += skip - 1; } return 0; } @@ -1344,7 +1355,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct page *page; - int not_managed = 0; int ret = 0; LIST_HEAD(source); @@ -1392,7 +1402,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) else ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); if (!ret) { /* Success */ - put_page(page); list_add_tail(&page->lru, &source); if (!__PageMovable(page)) inc_node_page_state(page, NR_ISOLATED_ANON + @@ -1401,22 +1410,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) } else { pr_warn("failed to isolate pfn %lx\n", pfn); dump_page(page, "isolation failed"); - put_page(page); - /* Because we don't have big zone->lock. we should - check this again here. */ - if (page_count(page)) { - not_managed++; - ret = -EBUSY; - break; - } } + put_page(page); } if (!list_empty(&source)) { - if (not_managed) { - putback_movable_pages(&source); - goto out; - } - /* Allocate a new page from the nearest neighbor node */ ret = migrate_pages(&source, new_node_page, NULL, 0, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); @@ -1429,7 +1426,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) putback_movable_pages(&source); } } -out: + return ret; } @@ -1576,7 +1573,6 @@ static int __ref __offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) { - mem_hotplug_done(); ret = -EINVAL; reason = "multizone range"; goto failed_removal; @@ -1591,7 +1587,6 @@ static int __ref __offline_pages(unsigned long start_pfn, MIGRATE_MOVABLE, SKIP_HWPOISON | REPORT_FAILURE); if (ret) { - mem_hotplug_done(); reason = "failure to isolate range"; goto failed_removal; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d4496d9d34f5..ee2bce59d2bf 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1314,7 +1314,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, nodemask_t *nodes) { unsigned long copy = ALIGN(maxnode-1, 64) / 8; - const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long); + unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long); if (copy > nbytes) { if (copy > PAGE_SIZE) @@ -1491,7 +1491,7 @@ static int kernel_get_mempolicy(int __user *policy, int uninitialized_var(pval); nodemask_t nodes; - if (nmask != NULL && maxnode < MAX_NUMNODES) + if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; err = do_get_mempolicy(&pval, &nodes, addr, flags); @@ -1527,7 +1527,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, unsigned long nr_bits, alloc_size; DECLARE_BITMAP(bm, MAX_NUMNODES); - nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); + nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) diff --git a/mm/migrate.c b/mm/migrate.c index a16b15090df3..181f5d2718a9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, /* Simple case, sync compaction */ if (mode != MIGRATE_ASYNC) { do { - get_bh(bh); lock_buffer(bh); bh = bh->b_this_page; @@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, /* async case, we cannot block on lock_buffer so use trylock_buffer */ do { - get_bh(bh); if (!trylock_buffer(bh)) { /* * We failed to lock the buffer and cannot stall in * async migration. Release the taken locks */ struct buffer_head *failed_bh = bh; - put_bh(failed_bh); bh = head; while (bh != failed_bh) { unlock_buffer(bh); - put_bh(bh); bh = bh->b_this_page; } return false; @@ -818,7 +814,6 @@ unlock_buffers: bh = head; do { unlock_buffer(bh); - put_bh(bh); bh = bh->b_this_page; } while (bh != head); @@ -1135,10 +1130,13 @@ out: * If migration is successful, decrease refcount of the newpage * which will not free the page because new page owner increased * refcounter. As well, if it is LRU page, add the page to LRU - * list in here. + * list in here. Use the old state of the isolated source page to + * determine if we migrated a LRU page. newpage was already unlocked + * and possibly modified by its owner - don't rely on the page + * state. */ if (rc == MIGRATEPAGE_SUCCESS) { - if (unlikely(__PageMovable(newpage))) + if (unlikely(!is_lru)) put_page(newpage); else putback_lru_page(newpage); @@ -1317,6 +1315,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, lock_page(hpage); } + /* + * Check for pages which are in the process of being freed. Without + * page_mapping() set, hugetlbfs specific move page routine will not + * be called and we could leak usage counts for subpools. + */ + if (page_private(hpage) && !page_mapping(hpage)) { + rc = -EBUSY; + goto out_unlock; + } + if (PageAnon(hpage)) anon_vma = page_get_anon_vma(hpage); @@ -1347,6 +1355,7 @@ put_anon: put_new_page = NULL; } +out_unlock: unlock_page(hpage); out: if (rc != -EAGAIN) diff --git a/mm/mincore.c b/mm/mincore.c index f0f91461a9f4..218099b5ed31 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -42,14 +42,72 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, return 0; } -static int mincore_unmapped_range(unsigned long addr, unsigned long end, - struct mm_walk *walk) +/* + * Later we can get more picky about what "in core" means precisely. + * For now, simply check to see if the page is in the page cache, + * and is up to date; i.e. that no page-in operation would be required + * at this time if an application were to map and access this page. + */ +static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) +{ + unsigned char present = 0; + struct page *page; + + /* + * When tmpfs swaps out a page from a file, any process mapping that + * file will not get a swp_entry_t in its pte, but rather it is like + * any other file mapping (ie. marked !present and faulted in with + * tmpfs's .fault). So swapped out tmpfs mappings are tested here. + */ +#ifdef CONFIG_SWAP + if (shmem_mapping(mapping)) { + page = find_get_entry(mapping, pgoff); + /* + * shmem/tmpfs may return swap: account for swapcache + * page too. + */ + if (xa_is_value(page)) { + swp_entry_t swp = radix_to_swp_entry(page); + page = find_get_page(swap_address_space(swp), + swp_offset(swp)); + } + } else + page = find_get_page(mapping, pgoff); +#else + page = find_get_page(mapping, pgoff); +#endif + if (page) { + present = PageUptodate(page); + put_page(page); + } + + return present; +} + +static int __mincore_unmapped_range(unsigned long addr, unsigned long end, + struct vm_area_struct *vma, unsigned char *vec) { - unsigned char *vec = walk->private; unsigned long nr = (end - addr) >> PAGE_SHIFT; + int i; - memset(vec, 0, nr); - walk->private += nr; + if (vma->vm_file) { + pgoff_t pgoff; + + pgoff = linear_page_index(vma, addr); + for (i = 0; i < nr; i++, pgoff++) + vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); + } else { + for (i = 0; i < nr; i++) + vec[i] = 0; + } + return nr; +} + +static int mincore_unmapped_range(unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + walk->private += __mincore_unmapped_range(addr, end, + walk->vma, walk->private); return 0; } @@ -69,9 +127,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, goto out; } - /* We'll consider a THP page under construction to be there */ if (pmd_trans_unstable(pmd)) { - memset(vec, 1, nr); + __mincore_unmapped_range(addr, end, vma, vec); goto out; } @@ -80,17 +137,28 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t pte = *ptep; if (pte_none(pte)) - *vec = 0; + __mincore_unmapped_range(addr, addr + PAGE_SIZE, + vma, vec); else if (pte_present(pte)) *vec = 1; else { /* pte is a swap entry */ swp_entry_t entry = pte_to_swp_entry(pte); - /* - * migration or hwpoison entries are always - * uptodate - */ - *vec = !!non_swap_entry(entry); + if (non_swap_entry(entry)) { + /* + * migration or hwpoison entries are always + * uptodate + */ + *vec = 1; + } else { +#ifdef CONFIG_SWAP + *vec = mincore_page(swap_address_space(entry), + swp_offset(entry)); +#else + WARN_ON(1); + *vec = 1; +#endif + } } vec++; } diff --git a/mm/mmap.c b/mm/mmap.c index f901065c4c64..fc1809b1bed6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2426,12 +2426,11 @@ int expand_downwards(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *prev; - int error; + int error = 0; address &= PAGE_MASK; - error = security_mmap_addr(address); - if (error) - return error; + if (address < mmap_min_addr) + return -EPERM; /* Enforce stack_guard_gap */ prev = vma->vm_prev; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f0e8cd9edb1a..26ea8636758f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -647,8 +647,8 @@ static int oom_reaper(void *unused) static void wake_oom_reaper(struct task_struct *tsk) { - /* tsk is already queued? */ - if (tsk == oom_reaper_list || tsk->oom_reaper_list) + /* mm is already queued? */ + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) return; get_task_struct(tsk); @@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) * still freeing memory. */ read_lock(&tasklist_lock); + + /* + * The task 'p' might have already exited before reaching here. The + * put_task_struct() will free task_struct 'p' while the loop still try + * to access the field of 'p', so, get an extra reference. + */ + get_task_struct(p); for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) } } } + put_task_struct(p); read_unlock(&tasklist_lock); /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d295c9bc01a8..0b9f577b1a2a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2170,6 +2170,18 @@ static inline void boost_watermark(struct zone *zone) max_boost = mult_frac(zone->_watermark[WMARK_HIGH], watermark_boost_factor, 10000); + + /* + * high watermark may be uninitialised if fragmentation occurs + * very early in boot so do not boost. We do not fall + * through and boost by pageblock_nr_pages as failing + * allocations that early means that reclaim is not going + * to help and it may even be impossible to reclaim the + * boosted watermark resulting in a hang. + */ + if (!max_boost) + return; + max_boost = max(pageblock_nr_pages, max_boost); zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, @@ -4675,11 +4687,11 @@ refill: /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size - 1); + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->offset = size; } @@ -4695,10 +4707,10 @@ refill: size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size); + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = size - fragsz; } @@ -5701,18 +5713,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, cond_resched(); } } -#ifdef CONFIG_SPARSEMEM - /* - * If the zone does not span the rest of the section then - * we should at least initialize those pages. Otherwise we - * could blow up on a poisoned page in some paths which depend - * on full sections being initialized (e.g. memory hotplug). - */ - while (end_pfn % PAGES_PER_SECTION) { - __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); - end_pfn++; - } -#endif } #ifdef CONFIG_ZONE_DEVICE diff --git a/mm/page_ext.c b/mm/page_ext.c index ae44f7adbe07..8c78b8d45117 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -398,10 +398,8 @@ void __init page_ext_init(void) * We know some arch can have a nodes layout such as * -------------pfn--------------> * N0 | N1 | N2 | N0 | N1 | N2|.... - * - * Take into account DEFERRED_STRUCT_PAGE_INIT. */ - if (early_pfn_to_nid(pfn) != nid) + if (pfn_to_nid(pfn) != nid) continue; if (init_section_page_ext(pfn, nid)) goto oom; diff --git a/mm/shmem.c b/mm/shmem.c index 6ece1e2fe76e..2c012eee133d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2848,16 +2848,20 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); - int ret; + int ret = 0; /* * No ordinary (disk based) filesystem counts links as inodes; * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. + * But if an O_TMPFILE file is linked into the tmpfs, the + * first link must skip that, to get the accounting right. */ - ret = shmem_reserve_inode(inode->i_sb); - if (ret) - goto out; + if (inode->i_nlink) { + ret = shmem_reserve_inode(inode->i_sb); + if (ret) + goto out; + } dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); diff --git a/mm/slab.c b/mm/slab.c index 78eb8c5bf4e4..91c1863df93d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2359,7 +2359,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, void *freelist; void *addr = page_address(page); - page->s_mem = kasan_reset_tag(addr) + colour_off; + page->s_mem = addr + colour_off; page->active = 0; if (OBJFREELIST_SLAB(cachep)) @@ -2368,6 +2368,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); + freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { @@ -2681,6 +2682,13 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, offset *= cachep->colour_off; + /* + * Call kasan_poison_slab() before calling alloc_slabmgmt(), so + * page_address() in the latter returns a non-tagged pointer, + * as it should be for slab pages. + */ + kasan_poison_slab(page); + /* Get slab management. */ freelist = alloc_slabmgmt(cachep, page, offset, local_flags & ~GFP_CONSTRAINT_MASK, page_node); @@ -2689,7 +2697,6 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep, slab_map_pages(cachep, page, freelist); - kasan_poison_slab(page); cache_init_objs(cachep, page); if (gfpflags_allow_blocking(local_flags)) @@ -3540,7 +3547,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *ret = slab_alloc(cachep, flags, _RET_IP_); - ret = kasan_slab_alloc(cachep, ret, flags); trace_kmem_cache_alloc(_RET_IP_, ret, cachep->object_size, cachep->size, flags); @@ -3630,7 +3636,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); - ret = kasan_slab_alloc(cachep, ret, flags); trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep->object_size, cachep->size, flags, nodeid); @@ -4408,6 +4413,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, unsigned int objnr; unsigned long offset; + ptr = kasan_reset_tag(ptr); + /* Find and validate object. */ cachep = page->slab_cache; objnr = obj_to_index(cachep, page, (void *)ptr); diff --git a/mm/slab.h b/mm/slab.h index 4190c24ef0e9..384105318779 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -437,11 +437,10 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, flags &= gfp_allowed_mask; for (i = 0; i < size; i++) { - void *object = p[i]; - - kmemleak_alloc_recursive(object, s->object_size, 1, + p[i] = kasan_slab_alloc(s, p[i], flags); + /* As p[i] might get tagged, call kmemleak hook after KASAN. */ + kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, flags); - p[i] = kasan_slab_alloc(s, object, flags); } if (memcg_kmem_enabled()) diff --git a/mm/slab_common.c b/mm/slab_common.c index 81732d05e74a..f9d89c1b5977 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1228,8 +1228,9 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) flags |= __GFP_COMP; page = alloc_pages(flags, order); ret = page ? page_address(page) : NULL; - kmemleak_alloc(ret, size, 1, flags); ret = kasan_kmalloc_large(ret, size, flags); + /* As ret might get tagged, call kmemleak hook after KASAN. */ + kmemleak_alloc(ret, size, 1, flags); return ret; } EXPORT_SYMBOL(kmalloc_order); diff --git a/mm/slub.c b/mm/slub.c index 1e3d0ec4e200..dc777761b6b7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -249,7 +249,18 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, unsigned long ptr_addr) { #ifdef CONFIG_SLAB_FREELIST_HARDENED - return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr); + /* + * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. + * Normally, this doesn't cause any issues, as both set_freepointer() + * and get_freepointer() are called with a pointer with the same tag. + * However, there are some issues with CONFIG_SLUB_DEBUG code. For + * example, when __free_slub() iterates over objects in a cache, it + * passes untagged pointers to check_object(). check_object() in turns + * calls get_freepointer() with an untagged pointer, which causes the + * freepointer to be restored incorrectly. + */ + return (void *)((unsigned long)ptr ^ s->random ^ + (unsigned long)kasan_reset_tag((void *)ptr_addr)); #else return ptr; #endif @@ -303,15 +314,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) __p < (__addr) + (__objects) * (__s)->size; \ __p += (__s)->size) -#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \ - for (__p = fixup_red_left(__s, __addr), __idx = 1; \ - __idx <= __objects; \ - __p += (__s)->size, __idx++) - /* Determine object index from a given position */ static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) { - return (p - addr) / s->size; + return (kasan_reset_tag(p) - addr) / s->size; } static inline unsigned int order_objects(unsigned int order, unsigned int size) @@ -507,6 +513,7 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; base = page_address(page); + object = kasan_reset_tag(object); object = restore_red_left(s, object); if (object < base || object >= base + page->objects * s->size || (object - base) % s->size) { @@ -1075,6 +1082,16 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, init_tracking(s, object); } +static void setup_page_debug(struct kmem_cache *s, void *addr, int order) +{ + if (!(s->flags & SLAB_POISON)) + return; + + metadata_access_enable(); + memset(addr, POISON_INUSE, PAGE_SIZE << order); + metadata_access_disable(); +} + static inline int alloc_consistency_checks(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) @@ -1330,6 +1347,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, #else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) {} +static inline void setup_page_debug(struct kmem_cache *s, + void *addr, int order) {} static inline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { return 0; } @@ -1374,8 +1393,10 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, */ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) { + ptr = kasan_kmalloc_large(ptr, size, flags); + /* As ptr might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc(ptr, size, 1, flags); - return kasan_kmalloc_large(ptr, size, flags); + return ptr; } static __always_inline void kfree_hook(void *x) @@ -1641,27 +1662,25 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); + kasan_poison_slab(page); + start = page_address(page); - if (unlikely(s->flags & SLAB_POISON)) - memset(start, POISON_INUSE, PAGE_SIZE << order); - - kasan_poison_slab(page); + setup_page_debug(s, start, order); shuffle = shuffle_freelist(s, page); if (!shuffle) { - for_each_object_idx(p, idx, s, start, page->objects) { - if (likely(idx < page->objects)) { - next = p + s->size; - next = setup_object(s, page, next); - set_freepointer(s, p, next); - } else - set_freepointer(s, p, NULL); - } start = fixup_red_left(s, start); start = setup_object(s, page, start); page->freelist = start; + for (idx = 0, p = start; idx < page->objects - 1; idx++) { + next = p + s->size; + next = setup_object(s, page, next); + set_freepointer(s, p, next); + p = next; + } + set_freepointer(s, p, NULL); } page->inuse = page->objects; diff --git a/mm/swap.c b/mm/swap.c index 4929bc1be60e..4d7d37eb3c40 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -320,11 +320,6 @@ static inline void activate_page_drain(int cpu) { } -static bool need_activate_page_drain(int cpu) -{ - return false; -} - void activate_page(struct page *page) { struct zone *zone = page_zone(page); @@ -653,13 +648,15 @@ void lru_add_drain(void) put_cpu(); } +#ifdef CONFIG_SMP + +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); + static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_drain(); } -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); - /* * Doesn't need any cpu hotplug locking because we do rely on per-cpu * kworkers being shut down before our page_alloc_cpu_dead callback is @@ -702,6 +699,12 @@ void lru_add_drain_all(void) mutex_unlock(&lock); } +#else +void lru_add_drain_all(void) +{ + lru_add_drain(); +} +#endif /** * release_pages - batched put_page() diff --git a/mm/util.c b/mm/util.c index 1ea055138043..379319b1bcfd 100644 --- a/mm/util.c +++ b/mm/util.c @@ -150,7 +150,7 @@ void *memdup_user(const void __user *src, size_t len) { void *p; - p = kmalloc_track_caller(len, GFP_USER); + p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); diff --git a/mm/vmscan.c b/mm/vmscan.c index a714c4f800e9..e979705bbf32 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, delta = freeable / 2; } - /* - * Make sure we apply some minimal pressure on default priority - * even on small cgroups. Stale objects are not only consuming memory - * by themselves, but can also hold a reference to a dying cgroup, - * preventing it from being reclaimed. A dying cgroup with all - * corresponding structures like per-cpu stats and kmem caches - * can be really big, so it may lead to a significant waste of memory. - */ - delta = max_t(unsigned long long, delta, min(freeable, batch_size)); - total_scan += delta; if (total_scan < 0) { pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", |