diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 19 | ||||
-rw-r--r-- | mm/gup.c | 29 | ||||
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/kasan/quarantine.c | 7 | ||||
-rw-r--r-- | mm/kfence/core.c | 10 | ||||
-rw-r--r-- | mm/memory-failure.c | 15 | ||||
-rw-r--r-- | mm/mremap.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/page_io.c | 4 | ||||
-rw-r--r-- | mm/readahead.c | 16 | ||||
-rw-r--r-- | mm/swapfile.c | 32 | ||||
-rw-r--r-- | mm/usercopy.c | 91 | ||||
-rw-r--r-- | mm/util.c | 32 |
13 files changed, 142 insertions, 124 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7176af65b103..ff60bd7d74e0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/blkdev.h> #include <linux/wait.h> #include <linux/rbtree.h> #include <linux/kthread.h> @@ -390,7 +391,6 @@ static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); - struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); struct backing_dev_info *bdi = wb->bdi; mutex_lock(&wb->bdi->cgwb_release_mutex); @@ -401,7 +401,7 @@ static void cgwb_release_workfn(struct work_struct *work) mutex_unlock(&wb->bdi->cgwb_release_mutex); /* triggers blkg destruction if no online users left */ - blkcg_unpin_online(blkcg); + blkcg_unpin_online(wb->blkcg_css); fprop_local_destroy_percpu(&wb->memcg_completions); @@ -446,7 +446,6 @@ static int cgwb_create(struct backing_dev_info *bdi, { struct mem_cgroup *memcg; struct cgroup_subsys_state *blkcg_css; - struct blkcg *blkcg; struct list_head *memcg_cgwb_list, *blkcg_cgwb_list; struct bdi_writeback *wb; unsigned long flags; @@ -454,9 +453,8 @@ static int cgwb_create(struct backing_dev_info *bdi, memcg = mem_cgroup_from_css(memcg_css); blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); - blkcg = css_to_blkcg(blkcg_css); memcg_cgwb_list = &memcg->cgwb_list; - blkcg_cgwb_list = &blkcg->cgwb_list; + blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css); /* look up again under lock and discard on blkcg mismatch */ spin_lock_irqsave(&cgwb_lock, flags); @@ -511,7 +509,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); - blkcg_pin_online(blkcg); + blkcg_pin_online(blkcg_css); css_get(memcg_css); css_get(blkcg_css); } @@ -724,18 +722,19 @@ void wb_memcg_offline(struct mem_cgroup *memcg) /** * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined - * @blkcg: blkcg being offlined + * @css: blkcg being offlined * * Also prevents creation of any new wb's associated with @blkcg. */ -void wb_blkcg_offline(struct blkcg *blkcg) +void wb_blkcg_offline(struct cgroup_subsys_state *css) { struct bdi_writeback *wb, *next; + struct list_head *list = blkcg_get_cgwb_list(css); spin_lock_irq(&cgwb_lock); - list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node) + list_for_each_entry_safe(wb, next, list, blkcg_node) cgwb_kill(wb); - blkcg->cgwb_list.next = NULL; /* prevent new wb's */ + list->next = NULL; /* prevent new wb's */ spin_unlock_irq(&cgwb_lock); } @@ -1648,6 +1648,35 @@ out: } EXPORT_SYMBOL(fault_in_writeable); +/** + * fault_in_subpage_writeable - fault in an address range for writing + * @uaddr: start of address range + * @size: size of address range + * + * Fault in a user address range for writing while checking for permissions at + * sub-page granularity (e.g. arm64 MTE). This function should be used when + * the caller cannot guarantee forward progress of a copy_to_user() loop. + * + * Returns the number of bytes not faulted in (like copy_to_user() and + * copy_from_user()). + */ +size_t fault_in_subpage_writeable(char __user *uaddr, size_t size) +{ + size_t faulted_in; + + /* + * Attempt faulting in at page granularity first for page table + * permission checking. The arch-specific probe_subpage_writeable() + * functions may not check for this. + */ + faulted_in = size - fault_in_writeable(uaddr, size); + if (faulted_in) + faulted_in -= probe_subpage_writeable(uaddr, faulted_in); + + return size - faulted_in; +} +EXPORT_SYMBOL(fault_in_subpage_writeable); + /* * fault_in_safe_writeable - fault in an address range for writing * @uaddr: start of address range diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c468fee595ff..910a138e9859 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2495,11 +2495,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct address_space *mapping = NULL; int extra_pins, ret; pgoff_t end; + bool is_hzp; - VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(head), head); VM_BUG_ON_PAGE(!PageCompound(head), head); + is_hzp = is_huge_zero_page(head); + VM_WARN_ON_ONCE_PAGE(is_hzp, head); + if (is_hzp) + return -EBUSY; + if (PageWriteback(head)) return -EBUSY; diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 08291ed33e93..0a9def8ce5e8 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -315,6 +315,13 @@ static void per_cpu_remove_cache(void *arg) struct qlist_head *q; q = this_cpu_ptr(&cpu_quarantine); + /* + * Ensure the ordering between the writing to q->offline and + * per_cpu_remove_cache. Prevent cpu_quarantine from being corrupted + * by interrupt. + */ + if (READ_ONCE(q->offline)) + return; qlist_move_cache(q, &to_free, cache); qlist_free_all(&to_free, cache); } diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 9b2b5f56f4ae..11a954763be9 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -621,6 +621,16 @@ static bool __init kfence_init_pool_early(void) * fails for the first page, and therefore expect addr==__kfence_pool in * most failure cases. */ + for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) { + struct slab *slab = virt_to_slab(p); + + if (!slab) + continue; +#ifdef CONFIG_MEMCG + slab->memcg_data = 0; +#endif + __folio_clear_slab(slab_folio(slab)); + } memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); __kfence_pool = NULL; return false; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 27760c19bad7..d4a4adcca01f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1274,7 +1274,7 @@ try_again: } out: if (ret == -EIO) - dump_page(p, "hwpoison: unhandlable page"); + pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p)); return ret; } @@ -1861,19 +1861,6 @@ try_again: if (PageTransHuge(hpage)) { /* - * Bail out before SetPageHasHWPoisoned() if hpage is - * huge_zero_page, although PG_has_hwpoisoned is not - * checked in set_huge_zero_page(). - * - * TODO: Handle memory failure of huge_zero_page thoroughly. - */ - if (is_huge_zero_page(hpage)) { - action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - res = -EBUSY; - goto unlock_mutex; - } - - /* * The flag must be set after the refcount is bumped * otherwise it may race with THP split. * And the flag can't be set in get_hwpoison_page() since diff --git a/mm/mremap.c b/mm/mremap.c index 303d3290b938..0b93fac76851 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -947,7 +947,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, return -EINTR; vma = vma_lookup(mm, addr); if (!vma) { - ret = EFAULT; + ret = -EFAULT; goto out; } diff --git a/mm/nommu.c b/mm/nommu.c index 55a9e48a7a02..9d7afc2d959e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -226,6 +226,8 @@ void *vmalloc(unsigned long size) } EXPORT_SYMBOL(vmalloc); +void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc); + /* * vzalloc - allocate virtually contiguous memory with zero fill * diff --git a/mm/page_io.c b/mm/page_io.c index 89fbf3cae30f..3fbdab6a940e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous) * attempt to access it in the page fault retry time check. */ if (synchronous) { - bio->bi_opf |= REQ_POLLED; get_task_struct(current); bio->bi_private = current; } @@ -372,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous) if (!READ_ONCE(bio->bi_private)) break; - if (!bio_poll(bio, NULL, 0)) - blk_io_schedule(); + blk_io_schedule(); } __set_current_state(TASK_RUNNING); bio_put(bio); diff --git a/mm/readahead.c b/mm/readahead.c index 8e3775829513..26bf74a6b2fe 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -113,6 +113,7 @@ * ->readpage() which may be less efficient. */ +#include <linux/blkdev.h> #include <linux/kernel.h> #include <linux/dax.h> #include <linux/gfp.h> @@ -474,7 +475,8 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - if (mark - index < (1UL << order)) + mark = round_up(mark, 1UL << order); + if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); if (err) @@ -555,8 +557,9 @@ static void ondemand_readahead(struct readahead_control *ractl, struct file_ra_state *ra = ractl->ra; unsigned long max_pages = ra->ra_pages; unsigned long add_pages; - unsigned long index = readahead_index(ractl); - pgoff_t prev_index; + pgoff_t index = readahead_index(ractl); + pgoff_t expected, prev_index; + unsigned int order = folio ? folio_order(folio) : 0; /* * If the request exceeds the readahead window, allow the read to @@ -575,8 +578,9 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - if ((index == (ra->start + ra->size - ra->async_size) || - index == (ra->start + ra->size))) { + expected = round_up(ra->start + ra->size - ra->async_size, + 1UL << order); + if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; ra->size = get_next_ra_size(ra, max_pages); ra->async_size = ra->size; @@ -662,7 +666,7 @@ readit: } ractl->_index = ra->start; - page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0); + page_cache_ra_order(ractl, ra, order); } void page_cache_sync_ra(struct readahead_control *ractl, diff --git a/mm/swapfile.c b/mm/swapfile.c index 63c61f8b2611..981a6e85c88e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -6,6 +6,7 @@ * Swap reorganised 29.12.95, Stephen Tweedie */ +#include <linux/blkdev.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> @@ -179,7 +180,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, 0); + nr_blocks, GFP_KERNEL); if (err) return err; cond_resched(); @@ -190,7 +191,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, 0); + nr_blocks, GFP_KERNEL); if (err) break; @@ -254,7 +255,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_NOIO, 0)) + nr_blocks, GFP_NOIO)) break; se = next_se(se); @@ -2466,7 +2467,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); - if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev))) + if (!p->bdev || !bdev_nonrot(p->bdev)) atomic_dec(&nr_rotate_swap); mutex_lock(&swapon_mutex); @@ -2761,7 +2762,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ - if (blk_queue_is_zoned(p->bdev->bd_disk->queue)) + if (bdev_is_zoned(p->bdev)) return -EINVAL; p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { @@ -2957,20 +2958,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, return nr_extents; } -/* - * Helper to sys_swapon determining if a given swap - * backing device queue supports DISCARD operations. - */ -static bool swap_discardable(struct swap_info_struct *si) -{ - struct request_queue *q = bdev_get_queue(si->bdev); - - if (!blk_queue_discard(q)) - return false; - - return true; -} - SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *p; @@ -3065,13 +3052,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } - if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) + if (p->bdev && bdev_stable_writes(p->bdev)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && p->bdev->bd_disk->fops->rw_page) p->flags |= SWP_SYNCHRONOUS_IO; - if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { + if (p->bdev && bdev_nonrot(p->bdev)) { int cpu; unsigned long ci, nr_cluster; @@ -3132,7 +3119,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sizeof(long), GFP_KERNEL); - if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { + if ((swap_flags & SWAP_FLAG_DISCARD) && + p->bdev && bdev_max_discard_sectors(p->bdev)) { /* * When discard is enabled for swap with no particular * policy flagged, we set all swap discard flags here in diff --git a/mm/usercopy.c b/mm/usercopy.c index 2c235d5c2364..baeacc735b83 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -17,6 +17,7 @@ #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/thread_info.h> +#include <linux/vmalloc.h> #include <linux/atomic.h> #include <linux/jump_label.h> #include <asm/sections.h> @@ -157,91 +158,47 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n, usercopy_abort("null address", NULL, to_user, ptr, n); } -/* Checks for allocs that are marked in some way as spanning multiple pages. */ -static inline void check_page_span(const void *ptr, unsigned long n, - struct page *page, bool to_user) +static inline void check_heap_object(const void *ptr, unsigned long n, + bool to_user) { -#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN - const void *end = ptr + n - 1; - struct page *endpage; - bool is_reserved, is_cma; + struct folio *folio; - /* - * Sometimes the kernel data regions are not marked Reserved (see - * check below). And sometimes [_sdata,_edata) does not cover - * rodata and/or bss, so check each range explicitly. - */ + if (is_kmap_addr(ptr)) { + unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1); - /* Allow reads of kernel rodata region (if not marked as Reserved). */ - if (ptr >= (const void *)__start_rodata && - end <= (const void *)__end_rodata) { - if (!to_user) - usercopy_abort("rodata", NULL, to_user, 0, n); + if ((unsigned long)ptr + n - 1 > page_end) + usercopy_abort("kmap", NULL, to_user, + offset_in_page(ptr), n); return; } - /* Allow kernel data region (if not marked as Reserved). */ - if (ptr >= (const void *)_sdata && end <= (const void *)_edata) - return; + if (is_vmalloc_addr(ptr)) { + struct vm_struct *area = find_vm_area(ptr); + unsigned long offset; - /* Allow kernel bss region (if not marked as Reserved). */ - if (ptr >= (const void *)__bss_start && - end <= (const void *)__bss_stop) - return; - - /* Is the object wholly within one base page? */ - if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == - ((unsigned long)end & (unsigned long)PAGE_MASK))) - return; + if (!area) { + usercopy_abort("vmalloc", "no area", to_user, 0, n); + return; + } - /* Allow if fully inside the same compound (__GFP_COMP) page. */ - endpage = virt_to_head_page(end); - if (likely(endpage == page)) + offset = ptr - area->addr; + if (offset + n > get_vm_area_size(area)) + usercopy_abort("vmalloc", NULL, to_user, offset, n); return; - - /* - * Reject if range is entirely either Reserved (i.e. special or - * device memory), or CMA. Otherwise, reject since the object spans - * several independently allocated pages. - */ - is_reserved = PageReserved(page); - is_cma = is_migrate_cma_page(page); - if (!is_reserved && !is_cma) - usercopy_abort("spans multiple pages", NULL, to_user, 0, n); - - for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { - page = virt_to_head_page(ptr); - if (is_reserved && !PageReserved(page)) - usercopy_abort("spans Reserved and non-Reserved pages", - NULL, to_user, 0, n); - if (is_cma && !is_migrate_cma_page(page)) - usercopy_abort("spans CMA and non-CMA pages", NULL, - to_user, 0, n); } -#endif -} - -static inline void check_heap_object(const void *ptr, unsigned long n, - bool to_user) -{ - struct folio *folio; if (!virt_addr_valid(ptr)) return; - /* - * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the - * highmem page or fallback to virt_to_page(). The following - * is effectively a highmem-aware virt_to_slab(). - */ - folio = page_folio(kmap_to_page((void *)ptr)); + folio = virt_to_folio(ptr); if (folio_test_slab(folio)) { /* Check slab allocator for flags and size. */ __check_heap_object(ptr, n, folio_slab(folio), to_user); - } else { - /* Verify object does not incorrectly span multiple pages. */ - check_page_span(ptr, n, folio_page(folio, 0), to_user); + } else if (folio_test_large(folio)) { + unsigned long offset = ptr - folio_address(folio); + if (offset + n > folio_size(folio)) + usercopy_abort("page alloc", NULL, to_user, offset, n); } } diff --git a/mm/util.c b/mm/util.c index 3492a9e81aa3..ac63e5ca8b21 100644 --- a/mm/util.c +++ b/mm/util.c @@ -343,6 +343,38 @@ unsigned long randomize_stack_top(unsigned long stack_top) #endif } +/** + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} + #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT unsigned long arch_randomize_brk(struct mm_struct *mm) { |