diff options
author | Bartosz Golaszewski <bartosz.golaszewski@linaro.org> | 2024-04-02 10:49:10 +0200 |
---|---|---|
committer | Bartosz Golaszewski <bartosz.golaszewski@linaro.org> | 2024-04-02 10:49:10 +0200 |
commit | 9164d6758af238db10f084930fb22ffe63ef3d5a (patch) | |
tree | 6e7429e5a976c913f56f15bb6c715e936347f912 /mm | |
parent | f752a52d34cbdcb288ae01ace6b66baa2bbb547f (diff) | |
parent | 39cd87c4eb2b893354f3b850f916353f2658ae6f (diff) |
Merge tag 'v6.9-rc2' into gpio/for-next
Linux 6.9-rc2
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/filemap.c | 16 | ||||
-rw-r--r-- | mm/gup.c | 14 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/page_owner.c | 33 | ||||
-rw-r--r-- | mm/shmem_quota.c | 10 | ||||
-rw-r--r-- | mm/userfaultfd.c | 3 | ||||
-rw-r--r-- | mm/zswap.c | 45 |
8 files changed, 99 insertions, 29 deletions
diff --git a/mm/Makefile b/mm/Makefile index e4b5b75aaec9..4abb40b911ec 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -29,8 +29,7 @@ KCOV_INSTRUMENT_mmzone.o := n KCOV_INSTRUMENT_vmstat.o := n KCOV_INSTRUMENT_failslab.o := n -CFLAGS_init-mm.o += $(call cc-disable-warning, override-init) -CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides) +CFLAGS_init-mm.o += -Wno-override-init mmu-y := nommu.o mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ diff --git a/mm/filemap.c b/mm/filemap.c index 7437b2bd75c1..30de18c4fd28 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4197,7 +4197,23 @@ static void filemap_cachestat(struct address_space *mapping, /* shmem file - in swap cache */ swp_entry_t swp = radix_to_swp_entry(folio); + /* swapin error results in poisoned entry */ + if (non_swap_entry(swp)) + goto resched; + + /* + * Getting a swap entry from the shmem + * inode means we beat + * shmem_unuse(). rcu_read_lock() + * ensures swapoff waits for us before + * freeing the swapper space. However, + * we can race with swapping and + * invalidation, so there might not be + * a shadow in the swapcache (yet). + */ shadow = get_shadow_from_swap_cache(swp); + if (!shadow) + goto resched; } #endif if (workingset_test_recent(shadow, true, &workingset)) @@ -1653,20 +1653,22 @@ long populate_vma_page_range(struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKONFAULT) return nr_pages; + /* ... similarly, we've never faulted in PROT_NONE pages */ + if (!vma_is_accessible(vma)) + return -EFAULT; + gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW * and we would not want to dirty them for nothing. + * + * Otherwise, do a read fault, and use FOLL_FORCE in case it's not + * readable (ie write-only or executable). */ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; - - /* - * We want mlock to succeed for regions that have any permissions - * other than PROT_NONE. - */ - if (vma_is_accessible(vma)) + else gup_flags |= FOLL_FORCE; if (locked) diff --git a/mm/memory.c b/mm/memory.c index f2bc6dd15eb8..904f70b99498 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1536,7 +1536,9 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); arch_check_zapped_pte(vma, ptent); tlb_remove_tlb_entry(tlb, pte, addr); - VM_WARN_ON_ONCE(userfaultfd_wp(vma)); + if (userfaultfd_pte_wp(vma, ptent)) + zap_install_uffd_wp_if_needed(vma, addr, pte, 1, + details, ptent); ksm_might_unmap_zero_page(mm, ptent); return 1; } diff --git a/mm/page_owner.c b/mm/page_owner.c index e7139952ffd9..d17d1351ec84 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -54,6 +54,22 @@ static depot_stack_handle_t early_handle; static void init_early_allocated_pages(void); +static inline void set_current_in_page_owner(void) +{ + /* + * Avoid recursion. + * + * We might need to allocate more memory from page_owner code, so make + * sure to signal it in order to avoid recursion. + */ + current->in_page_owner = 1; +} + +static inline void unset_current_in_page_owner(void) +{ + current->in_page_owner = 0; +} + static int __init early_page_owner_param(char *buf) { int ret = kstrtobool(buf, &page_owner_enabled); @@ -133,23 +149,16 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) depot_stack_handle_t handle; unsigned int nr_entries; - /* - * Avoid recursion. - * - * Sometimes page metadata allocation tracking requires more - * memory to be allocated: - * - when new stack trace is saved to stack depot - */ if (current->in_page_owner) return dummy_handle; - current->in_page_owner = 1; + set_current_in_page_owner(); nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2); handle = stack_depot_save(entries, nr_entries, flags); if (!handle) handle = failure_handle; + unset_current_in_page_owner(); - current->in_page_owner = 0; return handle; } @@ -164,9 +173,13 @@ static void add_stack_record_to_list(struct stack_record *stack_record, gfp_mask &= (GFP_ATOMIC | GFP_KERNEL); gfp_mask |= __GFP_NOWARN; + set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_mask); - if (!stack) + if (!stack) { + unset_current_in_page_owner(); return; + } + unset_current_in_page_owner(); stack->stack_record = stack_record; stack->next = NULL; diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c index 062d1c1097ae..ce514e700d2f 100644 --- a/mm/shmem_quota.c +++ b/mm/shmem_quota.c @@ -116,7 +116,7 @@ static int shmem_free_file_info(struct super_block *sb, int type) static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) { struct mem_dqinfo *info = sb_dqinfo(sb, qid->type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, *qid); struct quota_info *dqopt = sb_dqopt(sb); struct quota_id *entry = NULL; @@ -126,6 +126,7 @@ static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) return -ESRCH; down_read(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); @@ -165,7 +166,7 @@ out_unlock: static int shmem_acquire_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node **n; struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info; struct rb_node *parent = NULL, *new_node = NULL; struct quota_id *new_entry, *entry; @@ -176,6 +177,8 @@ static int shmem_acquire_dquot(struct dquot *dquot) mutex_lock(&dquot->dq_lock); down_write(&dqopt->dqio_sem); + n = &((struct rb_root *)info->dqi_priv)->rb_node; + while (*n) { parent = *n; entry = rb_entry(parent, struct quota_id, node); @@ -264,7 +267,7 @@ static bool shmem_is_empty_dquot(struct dquot *dquot) static int shmem_release_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, dquot->dq_id); struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); struct quota_id *entry = NULL; @@ -275,6 +278,7 @@ static int shmem_release_dquot(struct dquot *dquot) goto out_dqlock; down_write(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 712160cd41ec..3c3539c573e7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1444,7 +1444,8 @@ static int uffd_move_lock(struct mm_struct *mm, */ down_read(&(*dst_vmap)->vm_lock->lock); if (*dst_vmap != *src_vmap) - down_read(&(*src_vmap)->vm_lock->lock); + down_read_nested(&(*src_vmap)->vm_lock->lock, + SINGLE_DEPTH_NESTING); } mmap_read_unlock(mm); return err; diff --git a/mm/zswap.c b/mm/zswap.c index 9dec853647c8..caed028945b0 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1080,7 +1080,17 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page) mutex_lock(&acomp_ctx->mutex); src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); - if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) { + /* + * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer + * to do crypto_acomp_decompress() which might sleep. In such cases, we must + * resort to copying the buffer to a temporary one. + * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer, + * such as a kmap address of high memory or even ever a vmap address. + * However, sg_init_one is only equipped to handle linearly mapped low memory. + * In such cases, we also must copy the buffer to a temporary and lowmem one. + */ + if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) || + !virt_addr_valid(src)) { memcpy(acomp_ctx->buffer, src, entry->length); src = acomp_ctx->buffer; zpool_unmap_handle(zpool, entry->handle); @@ -1094,7 +1104,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page) BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); mutex_unlock(&acomp_ctx->mutex); - if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool)) + if (src != acomp_ctx->buffer) zpool_unmap_handle(zpool, entry->handle); } @@ -1313,6 +1323,14 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) return 0; + /* + * The shrinker resumes swap writeback, which will enter block + * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS + * rules (may_enter_fs()), which apply on a per-folio basis. + */ + if (!gfp_has_io_fs(sc->gfp_mask)) + return 0; + #ifdef CONFIG_MEMCG_KMEM mem_cgroup_flush_stats(memcg); nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; @@ -1618,6 +1636,7 @@ bool zswap_load(struct folio *folio) swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); struct page *page = &folio->page; + bool swapcache = folio_test_swapcache(folio); struct zswap_tree *tree = swap_zswap_tree(swp); struct zswap_entry *entry; u8 *dst; @@ -1630,7 +1649,20 @@ bool zswap_load(struct folio *folio) spin_unlock(&tree->lock); return false; } - zswap_rb_erase(&tree->rbroot, entry); + /* + * When reading into the swapcache, invalidate our entry. The + * swapcache can be the authoritative owner of the page and + * its mappings, and the pressure that results from having two + * in-memory copies outweighs any benefits of caching the + * compression work. + * + * (Most swapins go through the swapcache. The notable + * exception is the singleton fault on SWP_SYNCHRONOUS_IO + * files, which reads into a private page and may free it if + * the fault fails. We remain the primary owner of the entry.) + */ + if (swapcache) + zswap_rb_erase(&tree->rbroot, entry); spin_unlock(&tree->lock); if (entry->length) @@ -1645,9 +1677,10 @@ bool zswap_load(struct folio *folio) if (entry->objcg) count_objcg_event(entry->objcg, ZSWPIN); - zswap_entry_free(entry); - - folio_mark_dirty(folio); + if (swapcache) { + zswap_entry_free(entry); + folio_mark_dirty(folio); + } return true; } |