diff options
Diffstat (limited to 'fs')
73 files changed, 1574 insertions, 793 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 67afe68972d5..f8622ed72e08 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -533,14 +533,14 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, break; } - offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio); + offset = round_down(ctx->pos, sizeof(*dblock)) - folio_pos(folio); size = min_t(loff_t, folio_size(folio), - req->actual_len - folio_file_pos(folio)); + req->actual_len - folio_pos(folio)); do { dblock = kmap_local_folio(folio, offset); ret = afs_dir_iterate_block(dvnode, ctx, dblock, - folio_file_pos(folio) + offset); + folio_pos(folio) + offset); kunmap_local(dblock); if (ret != 1) goto out; diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index e2fa577b66fe..a71bff10496b 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -256,7 +256,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, folio = folio0; } - block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio)); /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -417,7 +417,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, folio = folio0; } - block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio)); /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -410,17 +410,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, struct kioctx *ctx; unsigned long flags; pgoff_t idx; - int rc; - - /* - * We cannot support the _NO_COPY case here, because copy needs to - * happen under the ctx->completion_lock. That does not work with the - * migration workflow of MIGRATE_SYNC_NO_COPY. - */ - if (mode == MIGRATE_SYNC_NO_COPY) - return -EINVAL; - - rc = 0; + int rc = 0; /* mapping->i_private_lock here protects against the kioctx teardown. */ spin_lock(&mapping->i_private_lock); @@ -465,7 +455,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, * events from being lost. */ spin_lock_irqsave(&ctx->completion_lock, flags); - folio_migrate_copy(dst, src); + folio_copy(dst, src); + folio_migrate_flags(dst, src); BUG_ON(ctx->ring_folios[idx] != src); ctx->ring_folios[idx] = dst; spin_unlock_irqrestore(&ctx->completion_lock, flags); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index cabf866c7956..618d2ff0292e 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -496,12 +496,6 @@ again: for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); alloc_cursor < k.k->p.offset; alloc_cursor++) { - ret = btree_trans_too_many_iters(trans); - if (ret) { - ob = ERR_PTR(ret); - break; - } - s->buckets_seen++; u64 bucket = alloc_cursor & ~(~0ULL << 56); diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index df3763c18c0e..1d6b691e8da6 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -6,15 +6,29 @@ #include <linux/kthread.h> #include <linux/preempt.h> -static inline long io_timer_cmp(io_timer_heap *h, - struct io_timer *l, - struct io_timer *r) +static inline bool io_timer_cmp(const void *l, const void *r, void __always_unused *args) { - return l->expire - r->expire; + struct io_timer **_l = (struct io_timer **)l; + struct io_timer **_r = (struct io_timer **)r; + + return (*_l)->expire < (*_r)->expire; +} + +static inline void io_timer_swp(void *l, void *r, void __always_unused *args) +{ + struct io_timer **_l = (struct io_timer **)l; + struct io_timer **_r = (struct io_timer **)r; + + swap(*_l, *_r); } void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) { + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; + spin_lock(&clock->timer_lock); if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) { @@ -23,22 +37,27 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) return; } - for (size_t i = 0; i < clock->timers.used; i++) + for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) goto out; - BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL)); + BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL)); out: spin_unlock(&clock->timer_lock); } void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) { + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; + spin_lock(&clock->timer_lock); - for (size_t i = 0; i < clock->timers.used; i++) + for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) { - heap_del(&clock->timers, i, io_timer_cmp, NULL); + min_heap_del(&clock->timers, i, &callbacks, NULL); break; } @@ -123,10 +142,17 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now) { struct io_timer *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; + + if (clock->timers.nr && + time_after_eq64(now, clock->timers.data[0]->expire)) { + ret = *min_heap_peek(&clock->timers); + min_heap_pop(&clock->timers, &callbacks, NULL); + } - if (clock->timers.used && - time_after_eq64(now, clock->timers.data[0]->expire)) - heap_pop(&clock->timers, ret, io_timer_cmp, NULL); return ret; } @@ -150,7 +176,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) printbuf_tabstop_push(out, 40); prt_printf(out, "current time:\t%llu\n", now); - for (unsigned i = 0; i < clock->timers.used; i++) + for (unsigned i = 0; i < clock->timers.nr; i++) prt_printf(out, "%ps %ps:\t%llu\n", clock->timers.data[i]->fn, clock->timers.data[i]->fn2, diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h index 9c25d0fcf294..37554e4514fe 100644 --- a/fs/bcachefs/clock_types.h +++ b/fs/bcachefs/clock_types.h @@ -24,7 +24,7 @@ struct io_timer { /* Amount to buffer up on a percpu counter */ #define IO_CLOCK_PCPU_SECTORS 128 -typedef HEAP(struct io_timer *) io_timer_heap; +typedef DEFINE_MIN_HEAP(struct io_timer *, io_timer_heap) io_timer_heap; struct io_clock { atomic64_t now; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 86948d110f6b..9b5b5c9a6c63 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -901,8 +901,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) mutex_lock(&c->ec_stripes_heap_lock); if (n.size > h->size) { - memcpy(n.data, h->data, h->used * sizeof(h->data[0])); - n.used = h->used; + memcpy(n.data, h->data, h->nr * sizeof(h->data[0])); + n.nr = h->nr; swap(*h, n); } mutex_unlock(&c->ec_stripes_heap_lock); @@ -993,7 +993,7 @@ static u64 stripe_idx_to_delete(struct bch_fs *c) lockdep_assert_held(&c->ec_stripes_heap_lock); - if (h->used && + if (h->nr && h->data[0].blocks_nonempty == 0 && !bch2_stripe_is_open(c, h->data[0].idx)) return h->data[0].idx; @@ -1001,14 +1001,6 @@ static u64 stripe_idx_to_delete(struct bch_fs *c) return 0; } -static inline int ec_stripes_heap_cmp(ec_stripes_heap *h, - struct ec_stripe_heap_entry l, - struct ec_stripe_heap_entry r) -{ - return ((l.blocks_nonempty > r.blocks_nonempty) - - (l.blocks_nonempty < r.blocks_nonempty)); -} - static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, size_t i) { @@ -1017,39 +1009,71 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i; } +static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args) +{ + struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l; + struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r; + + return ((_l->blocks_nonempty > _r->blocks_nonempty) < + (_l->blocks_nonempty < _r->blocks_nonempty)); +} + +static inline void ec_stripes_heap_swap(void *l, void *r, void *h) +{ + struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l; + struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r; + ec_stripes_heap *_h = (ec_stripes_heap *)h; + size_t i = _l - _h->data; + size_t j = _r - _h->data; + + swap(*_l, *_r); + + ec_stripes_heap_set_backpointer(_h, i); + ec_stripes_heap_set_backpointer(_h, j); +} + static void heap_verify_backpointer(struct bch_fs *c, size_t idx) { ec_stripes_heap *h = &c->ec_stripes_heap; struct stripe *m = genradix_ptr(&c->stripes, idx); - BUG_ON(m->heap_idx >= h->used); + BUG_ON(m->heap_idx >= h->nr); BUG_ON(h->data[m->heap_idx].idx != idx); } void bch2_stripes_heap_del(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; + mutex_lock(&c->ec_stripes_heap_lock); heap_verify_backpointer(c, idx); - heap_del(&c->ec_stripes_heap, m->heap_idx, - ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap); mutex_unlock(&c->ec_stripes_heap_lock); } void bch2_stripes_heap_insert(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; + mutex_lock(&c->ec_stripes_heap_lock); - BUG_ON(heap_full(&c->ec_stripes_heap)); + BUG_ON(min_heap_full(&c->ec_stripes_heap)); - heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) { + genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr; + min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) { .idx = idx, .blocks_nonempty = m->blocks_nonempty, }), - ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + &callbacks, + &c->ec_stripes_heap); heap_verify_backpointer(c, idx); mutex_unlock(&c->ec_stripes_heap_lock); @@ -1058,6 +1082,10 @@ void bch2_stripes_heap_insert(struct bch_fs *c, void bch2_stripes_heap_update(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; ec_stripes_heap *h = &c->ec_stripes_heap; bool do_deletes; size_t i; @@ -1068,10 +1096,8 @@ void bch2_stripes_heap_update(struct bch_fs *c, h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty; i = m->heap_idx; - heap_sift_up(h, i, ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); - heap_sift_down(h, i, ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap); + min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap); heap_verify_backpointer(c, idx); @@ -1864,7 +1890,7 @@ static s64 get_existing_stripe(struct bch_fs *c, return -1; mutex_lock(&c->ec_stripes_heap_lock); - for (heap_idx = 0; heap_idx < h->used; heap_idx++) { + for (heap_idx = 0; heap_idx < h->nr; heap_idx++) { /* No blocks worth reusing, stripe will just be deleted: */ if (!h->data[heap_idx].blocks_nonempty) continue; @@ -2195,7 +2221,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) size_t i; mutex_lock(&c->ec_stripes_heap_lock); - for (i = 0; i < min_t(size_t, h->used, 50); i++) { + for (i = 0; i < min_t(size_t, h->nr, 50); i++) { m = genradix_ptr(&c->stripes, h->data[i].idx); prt_printf(out, "%zu %u/%u+%u", h->data[i].idx, diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h index 976426da3a12..1df03dccfc72 100644 --- a/fs/bcachefs/ec_types.h +++ b/fs/bcachefs/ec_types.h @@ -36,6 +36,6 @@ struct ec_stripe_heap_entry { unsigned blocks_nonempty; }; -typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap; +typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap; #endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index cc4f0963c0c5..9138944c5ae6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -283,6 +283,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { + struct bch_fs *c = trans->c; struct bch_hash_info dir_hash; struct bch_inode_unpacked lostfound; char name_buf[20]; @@ -317,7 +318,7 @@ static int reattach_inode(struct btree_trans *trans, return ret; } - dir_hash = bch2_hash_info_init(trans->c, &lostfound); + dir_hash = bch2_hash_info_init(c, &lostfound); name = (struct qstr) QSTR(name_buf); @@ -330,8 +331,10 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_subvol ?: inode->bi_inum, &dir_offset, STR_HASH_must_create); - if (ret) + if (ret) { + bch_err_msg(c, ret, "error creating dirent"); return ret; + } inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index d8a630742887..70b998d9f19c 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -206,6 +206,7 @@ void bch2_journal_space_available(struct journal *j) if (nr_online < metadata_replicas_required(c)) { struct printbuf buf = PRINTBUF; + buf.atomic++; prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" "rw journal devs:", nr_online, metadata_replicas_required(c)); diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h index 4fcf062dd22c..47e4a3c3d26e 100644 --- a/fs/bcachefs/mean_and_variance.h +++ b/fs/bcachefs/mean_and_variance.h @@ -111,11 +111,11 @@ static inline u128_u u128_shl(u128_u i, s8 shift) { u128_u r; - r.lo = i.lo << shift; + r.lo = i.lo << (shift & 63); if (shift < 64) - r.hi = (i.hi << shift) | (i.lo >> (64 - shift)); + r.hi = (i.hi << (shift & 63)) | (i.lo >> (-shift & 63)); else { - r.hi = i.lo << (shift - 64); + r.hi = i.lo << (-shift & 63); r.lo = 0; } return r; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 4ec7e44d6e36..138320eaa2ad 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * random utiility code, for bcache but in theory not specific to bcache + * random utility code, for bcache but in theory not specific to bcache * * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> * Copyright 2012 Google, Inc. diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 2def4f761ca6..902b7f5406a2 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -8,6 +8,7 @@ #include <linux/errno.h> #include <linux/freezer.h> #include <linux/kernel.h> +#include <linux/min_heap.h> #include <linux/sched/clock.h> #include <linux/llist.h> #include <linux/log2.h> @@ -54,17 +55,9 @@ static inline size_t buf_pages(void *p, size_t len) PAGE_SIZE); } -#define HEAP(type) \ -struct { \ - size_t size, used; \ - type *data; \ -} - -#define DECLARE_HEAP(type, name) HEAP(type) name - #define init_heap(heap, _size, gfp) \ ({ \ - (heap)->used = 0; \ + (heap)->nr = 0; \ (heap)->size = (_size); \ (heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\ (gfp)); \ @@ -76,113 +69,6 @@ do { \ (heap)->data = NULL; \ } while (0) -#define heap_set_backpointer(h, i, _fn) \ -do { \ - void (*fn)(typeof(h), size_t) = _fn; \ - if (fn) \ - fn(h, i); \ -} while (0) - -#define heap_swap(h, i, j, set_backpointer) \ -do { \ - swap((h)->data[i], (h)->data[j]); \ - heap_set_backpointer(h, i, set_backpointer); \ - heap_set_backpointer(h, j, set_backpointer); \ -} while (0) - -#define heap_peek(h) \ -({ \ - EBUG_ON(!(h)->used); \ - (h)->data[0]; \ -}) - -#define heap_full(h) ((h)->used == (h)->size) - -#define heap_sift_down(h, i, cmp, set_backpointer) \ -do { \ - size_t _c, _j = i; \ - \ - for (; _j * 2 + 1 < (h)->used; _j = _c) { \ - _c = _j * 2 + 1; \ - if (_c + 1 < (h)->used && \ - cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \ - _c++; \ - \ - if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \ - break; \ - heap_swap(h, _c, _j, set_backpointer); \ - } \ -} while (0) - -#define heap_sift_up(h, i, cmp, set_backpointer) \ -do { \ - while (i) { \ - size_t p = (i - 1) / 2; \ - if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \ - break; \ - heap_swap(h, i, p, set_backpointer); \ - i = p; \ - } \ -} while (0) - -#define __heap_add(h, d, cmp, set_backpointer) \ -({ \ - size_t _i = (h)->used++; \ - (h)->data[_i] = d; \ - heap_set_backpointer(h, _i, set_backpointer); \ - \ - heap_sift_up(h, _i, cmp, set_backpointer); \ - _i; \ -}) - -#define heap_add(h, d, cmp, set_backpointer) \ -({ \ - bool _r = !heap_full(h); \ - if (_r) \ - __heap_add(h, d, cmp, set_backpointer); \ - _r; \ -}) - -#define heap_add_or_replace(h, new, cmp, set_backpointer) \ -do { \ - if (!heap_add(h, new, cmp, set_backpointer) && \ - cmp(h, new, heap_peek(h)) >= 0) { \ - (h)->data[0] = new; \ - heap_set_backpointer(h, 0, set_backpointer); \ - heap_sift_down(h, 0, cmp, set_backpointer); \ - } \ -} while (0) - -#define heap_del(h, i, cmp, set_backpointer) \ -do { \ - size_t _i = (i); \ - \ - BUG_ON(_i >= (h)->used); \ - (h)->used--; \ - if ((_i) < (h)->used) { \ - heap_swap(h, _i, (h)->used, set_backpointer); \ - heap_sift_up(h, _i, cmp, set_backpointer); \ - heap_sift_down(h, _i, cmp, set_backpointer); \ - } \ -} while (0) - -#define heap_pop(h, d, cmp, set_backpointer) \ -({ \ - bool _r = (h)->used; \ - if (_r) { \ - (d) = (h)->data[0]; \ - heap_del(h, 0, cmp, set_backpointer); \ - } \ - _r; \ -}) - -#define heap_resort(heap, cmp, set_backpointer) \ -do { \ - ssize_t _i; \ - for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \ - heap_sift_down(heap, _i, cmp, set_backpointer); \ -} while (0) - #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b592fc8cf368..0533d0f82dc9 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2981,8 +2981,7 @@ static int relocate_one_folio(struct reloc_control *rc, if (folio_test_readahead(folio)) page_cache_async_readahead(inode->i_mapping, ra, NULL, - folio, index, - last_index + 1 - index); + folio, last_index + 1 - index); if (!folio_test_uptodate(folio)) { btrfs_read_folio(NULL, folio); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fb3675f5bf50..4ca711a773ef 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5306,7 +5306,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) if (folio_test_readahead(folio)) page_cache_async_readahead(mapping, &sctx->ra, NULL, folio, - index, last_index + 1 - index); + last_index + 1 - index); if (!folio_test_uptodate(folio)) { btrfs_read_folio(NULL, folio); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 82a2e2a06a65..5aadc56e0cc0 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -141,7 +141,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, if (ptr_pos >= i_size_read(dir)) return NULL; - if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) { + if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) { ceph_readdir_cache_release(cache_ctl); cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); if (!cache_ctl->page) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 249ddfbb1b03..8f8de8f33abb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1863,7 +1863,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, unsigned idx = ctl->index % nsize; pgoff_t pgoff = ctl->index / nsize; - if (!ctl->page || pgoff != page_index(ctl->page)) { + if (!ctl->page || pgoff != ctl->page->index) { ceph_readdir_cache_release(ctl); if (idx == 0) ctl->page = grab_cache_page(&dir->i_data, pgoff); diff --git a/fs/coredump.c b/fs/coredump.c index a57a06b80f57..4dc5140bac3f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -361,17 +361,16 @@ out: return ispipe; } -static int zap_process(struct task_struct *start, int exit_code) +static int zap_process(struct signal_struct *signal, int exit_code) { struct task_struct *t; int nr = 0; - /* Allow SIGKILL, see prepare_signal() */ - start->signal->flags = SIGNAL_GROUP_EXIT; - start->signal->group_exit_code = exit_code; - start->signal->group_stop_count = 0; + signal->flags = SIGNAL_GROUP_EXIT; + signal->group_exit_code = exit_code; + signal->group_stop_count = 0; - for_each_thread(start, t) { + __for_each_thread(signal, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); @@ -391,8 +390,9 @@ static int zap_threads(struct task_struct *tsk, spin_lock_irq(&tsk->sighand->siglock); if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) { + /* Allow SIGKILL, see prepare_signal() */ signal->core_state = core_state; - nr = zap_process(tsk, exit_code); + nr = zap_process(signal, exit_code); clear_tsk_thread_flag(tsk, TIF_SIGPENDING); tsk->flags |= PF_DUMPCORE; atomic_set(&core_state->nr_threads, nr); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 81dab95f67ed..9f6cff356796 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -222,13 +222,13 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct hstate *h = hstate_file(file); const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); if (len & ~huge_page_mask(h)) return -EINVAL; - if (len > TASK_SIZE) + if (len > mmap_end - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) { @@ -239,9 +239,10 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, huge_page_size(h)); - vma = find_vma(mm, addr); - if (mmap_end - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) + vma = find_vma_prev(mm, addr, &prev); + if (mmap_end - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) return addr; } @@ -422,7 +423,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma, if (!ptep) return false; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(vma->vm_mm, addr, ptep); if (huge_pte_none(pte) || !pte_present(pte)) return false; @@ -892,7 +893,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, error = PTR_ERR(folio); goto out; } - clear_huge_page(&folio->page, addr, pages_per_huge_page(h)); + folio_zero_user(folio, ALIGN_DOWN(addr, hpage_size)); __folio_mark_uptodate(folio); error = hugetlb_add_to_page_cache(folio, mapping, index); if (unlikely(error)) { @@ -1128,10 +1129,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, hugetlb_set_folio_subpool(src, NULL); } - if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(dst, src); - else - folio_migrate_flags(dst, src); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 4c0401dbbfcf..a6d5d07cd436 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -271,7 +271,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) kenter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, - folio_file_pos(folio), folio_size(folio), + folio_pos(folio), folio_size(folio), NETFS_READPAGE); if (IS_ERR(rreq)) { ret = PTR_ERR(rreq); @@ -470,7 +470,7 @@ retry: } rreq = netfs_alloc_request(mapping, file, - folio_file_pos(folio), folio_size(folio), + folio_pos(folio), folio_size(folio), NETFS_READ_FOR_WRITE); if (IS_ERR(rreq)) { ret = PTR_ERR(rreq); diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index ecbc99ec7d36..68a3f1383cee 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -54,7 +54,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, { struct netfs_folio *finfo = netfs_folio_info(folio); struct netfs_group *group = netfs_folio_group(folio); - loff_t pos = folio_file_pos(folio); + loff_t pos = folio_pos(folio); kenter(""); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9aa2ab218c0a..61a8cdb9f1e1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -591,7 +591,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", filp, filp->f_mapping->host->i_ino, - (long long)folio_file_pos(folio)); + (long long)folio_pos(folio)); sb_start_pagefault(inode->i_sb); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index b17a9eb9b148..49862c95b224 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -46,6 +46,10 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ #define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats) static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 190c1fa8882c..d074d0ceb4f0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -182,7 +182,7 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset, end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); if (i_size > 0 && folio->index < end_index) goto out; - end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; + end = folio_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) goto out; trace_nfs_size_grow(inode, end); @@ -1344,7 +1344,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, - (long long)(folio_file_pos(folio) + offset)); + (long long)(folio_pos(folio) + offset)); if (!count) goto out; diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 383f0afa2cea..cd14ea25968c 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -450,15 +450,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { - struct buffer_head *pbh; - __u64 key; + loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh); - key = page_index(bh->b_page) << (PAGE_SHIFT - - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) - key++; - - return key; + return pos >> bmap->b_inode->i_blkbits; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6ea81f1d5094..0ca3110d6386 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) + ((__s32)((a) - (b)) >= 0)) static int nilfs_prepare_segment_lock(struct super_block *sb, struct nilfs_transaction_info *ti) @@ -1639,41 +1639,30 @@ static void nilfs_begin_folio_io(struct folio *folio) folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; - - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, - b_assoc_buffers) { - if (bh->b_folio != bd_folio) { - if (bd_folio) { - folio_lock(bd_folio); - folio_wait_writeback(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - } - bd_folio = bh->b_folio; - } - } - + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_folio != bd_folio) { - folio_lock(bd_folio); - folio_wait_writeback(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - bd_folio = bh->b_folio; - } + if (bh == segbuf->sb_super_root) break; - } set_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_begin_folio_io(fs_folio); @@ -1681,6 +1670,42 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } } } + nilfs_begin_folio_io(fs_folio); + + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + } + bd_folio = bh->b_folio; + } + } + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + if (bd_folio) { folio_lock(bd_folio); folio_wait_writeback(bd_folio); @@ -1688,7 +1713,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) folio_start_writeback(bd_folio); folio_unlock(bd_folio); } - nilfs_begin_folio_io(fs_folio); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -2070,10 +2094,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) @@ -2824,8 +2845,6 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_mapping->host, NULL); - err = nilfs_segctor_start_thread(nilfs->ns_writer); if (unlikely(err)) nilfs_detach_log_writer(sb); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 379d22e28ed6..a5569b7f47a3 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ -static struct kobj_type nilfs_##name##_ktype = { \ +static const struct kobj_type nilfs_##name##_ktype = { \ .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ @@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { .store = nilfs_snapshot_attr_store, }; -static struct kobj_type nilfs_snapshot_ktype = { +static const struct kobj_type nilfs_snapshot_ktype = { .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, @@ -967,7 +967,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { .store = nilfs_dev_attr_store, }; -static struct kobj_type nilfs_dev_ktype = { +static const struct kobj_type nilfs_dev_ktype = { .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 8e6bcdf99770..6ede3e924dec 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -231,7 +231,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, struct ntfs_sb_info *sbi; struct ATTRIB *attr_s; struct MFT_REC *rec; - u32 used, asize, rsize, aoff, align; + u32 used, asize, rsize, aoff; bool is_data; CLST len, alen; char *next; @@ -252,10 +252,13 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, rsize = le32_to_cpu(attr->res.data_size); is_data = attr->type == ATTR_DATA && !attr->name_len; - align = sbi->cluster_size; - if (is_attr_compressed(attr)) - align <<= COMPRESSION_UNIT; - len = (rsize + align - 1) >> sbi->cluster_bits; + /* len - how many clusters required to store 'rsize' bytes */ + if (is_attr_compressed(attr)) { + u8 shift = sbi->cluster_bits + NTFS_LZNT_CUNIT; + len = ((rsize + (1u << shift) - 1) >> shift) << NTFS_LZNT_CUNIT; + } else { + len = bytes_to_cluster(sbi, rsize); + } run_init(run); @@ -285,22 +288,21 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, if (err) goto out2; } else if (!page) { - char *kaddr; - - page = grab_cache_page(ni->vfs_inode.i_mapping, 0); - if (!page) { - err = -ENOMEM; + struct address_space *mapping = ni->vfs_inode.i_mapping; + struct folio *folio; + + folio = __filemap_get_folio( + mapping, 0, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out2; } - kaddr = kmap_atomic(page); - memcpy(kaddr, data, rsize); - memset(kaddr + rsize, 0, PAGE_SIZE - rsize); - kunmap_atomic(kaddr); - flush_dcache_page(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - put_page(page); + folio_fill_tail(folio, 0, data, rsize); + folio_mark_uptodate(folio); + folio_mark_dirty(folio); + folio_unlock(folio); + folio_put(folio); } } @@ -670,7 +672,8 @@ pack_runs: goto undo_2; } - if (!is_mft) + /* keep runs for $MFT::$ATTR_DATA and $MFT::$ATTR_BITMAP. */ + if (ni->mi.rno != MFT_REC_MFT) run_truncate_head(run, evcn + 1); svcn = le64_to_cpu(attr->nres.svcn); @@ -972,6 +975,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, if (err) goto out; + /* Check for compressed frame. */ + err = attr_is_frame_compressed(ni, attr, vcn >> NTFS_LZNT_CUNIT, &hint); + if (err) + goto out; + + if (hint) { + /* if frame is compressed - don't touch it. */ + *lcn = COMPRESSED_LCN; + *len = hint; + err = -EOPNOTSUPP; + goto out; + } + if (!*len) { if (run_lookup_entry(run, vcn, lcn, len, NULL)) { if (*lcn != SPARSE_LCN || !new) @@ -1223,11 +1239,12 @@ undo1: goto out; } -int attr_data_read_resident(struct ntfs_inode *ni, struct page *page) +int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio) { u64 vbo; struct ATTRIB *attr; u32 data_size; + size_t len; attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL); if (!attr) @@ -1236,30 +1253,20 @@ int attr_data_read_resident(struct ntfs_inode *ni, struct page *page) if (attr->non_res) return E_NTFS_NONRESIDENT; - vbo = page->index << PAGE_SHIFT; + vbo = folio->index << PAGE_SHIFT; data_size = le32_to_cpu(attr->res.data_size); - if (vbo < data_size) { - const char *data = resident_data(attr); - char *kaddr = kmap_atomic(page); - u32 use = data_size - vbo; - - if (use > PAGE_SIZE) - use = PAGE_SIZE; + if (vbo > data_size) + len = 0; + else + len = min(data_size - vbo, folio_size(folio)); - memcpy(kaddr, data + vbo, use); - memset(kaddr + use, 0, PAGE_SIZE - use); - kunmap_atomic(kaddr); - flush_dcache_page(page); - SetPageUptodate(page); - } else if (!PageUptodate(page)) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); - } + folio_fill_tail(folio, 0, resident_data(attr) + vbo, len); + folio_mark_uptodate(folio); return 0; } -int attr_data_write_resident(struct ntfs_inode *ni, struct page *page) +int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio) { u64 vbo; struct mft_inode *mi; @@ -1275,17 +1282,13 @@ int attr_data_write_resident(struct ntfs_inode *ni, struct page *page) return E_NTFS_NONRESIDENT; } - vbo = page->index << PAGE_SHIFT; + vbo = folio->index << PAGE_SHIFT; data_size = le32_to_cpu(attr->res.data_size); if (vbo < data_size) { char *data = resident_data(attr); - char *kaddr = kmap_atomic(page); - u32 use = data_size - vbo; + size_t len = min(data_size - vbo, folio_size(folio)); - if (use > PAGE_SIZE) - use = PAGE_SIZE; - memcpy(data + vbo, kaddr, use); - kunmap_atomic(kaddr); + memcpy_from_folio(data + vbo, folio, 0, len); mi->dirty = true; } ni->i_valid = data_size; @@ -1378,7 +1381,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, u32 voff; u8 bytes_per_off; char *addr; - struct page *page; + struct folio *folio; int i, err; __le32 *off32; __le64 *off64; @@ -1423,18 +1426,18 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, wof_size = le64_to_cpu(attr->nres.data_size); down_write(&ni->file.run_lock); - page = ni->file.offs_page; - if (!page) { - page = alloc_page(GFP_KERNEL); - if (!page) { + folio = ni->file.offs_folio; + if (!folio) { + folio = folio_alloc(GFP_KERNEL, 0); + if (!folio) { err = -ENOMEM; goto out; } - page->index = -1; - ni->file.offs_page = page; + folio->index = -1; + ni->file.offs_folio = folio; } - lock_page(page); - addr = page_address(page); + folio_lock(folio); + addr = folio_address(folio); if (vbo[1]) { voff = vbo[1] & (PAGE_SIZE - 1); @@ -1450,7 +1453,8 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, do { pgoff_t index = vbo[i] >> PAGE_SHIFT; - if (index != page->index) { + if (index != folio->index) { + struct page *page = &folio->page; u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1); u64 to = min(from + PAGE_SIZE, wof_size); @@ -1463,10 +1467,10 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, err = ntfs_bio_pages(sbi, run, &page, 1, from, to - from, REQ_OP_READ); if (err) { - page->index = -1; + folio->index = -1; goto out1; } - page->index = index; + folio->index = index; } if (i) { @@ -1504,7 +1508,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, *ondisk_size = off[1] - off[0]; out1: - unlock_page(page); + folio_unlock(folio); out: up_write(&ni->file.run_lock); return err; @@ -1722,6 +1726,7 @@ repack: attr_b->nres.total_size = cpu_to_le64(total_size); inode_set_bytes(&ni->vfs_inode, total_size); + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mi_b->dirty = true; mark_inode_dirty(&ni->vfs_inode); @@ -2356,8 +2361,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; } - if (vbo > data_size) { - /* Insert range after the file size is not allowed. */ + if (vbo >= data_size) { + /* + * Insert range after the file size is not allowed. + * If the offset is equal to or greater than the end of + * file, an error is returned. For such operations (i.e., inserting + * a hole at the end of file), ftruncate(2) should be used. + */ return -EINVAL; } diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index c9eb01ccee51..cf4fe21a5039 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -1382,7 +1382,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); if (err) - break; + return err; bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 1937e8e612f8..fc6a8aa29e3a 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -272,9 +272,12 @@ out: return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode; } -static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, - const struct NTFS_DE *e, u8 *name, - struct dir_context *ctx) +/* + * returns false if 'ctx' if full + */ +static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi, + struct ntfs_inode *ni, const struct NTFS_DE *e, + u8 *name, struct dir_context *ctx) { const struct ATTR_FILE_NAME *fname; unsigned long ino; @@ -284,29 +287,29 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, fname = Add2Ptr(e, sizeof(struct NTFS_DE)); if (fname->type == FILE_NAME_DOS) - return 0; + return true; if (!mi_is_ref(&ni->mi, &fname->home)) - return 0; + return true; ino = ino_get(&e->ref); if (ino == MFT_REC_ROOT) - return 0; + return true; /* Skip meta files. Unless option to show metafiles is set. */ if (!sbi->options->showmeta && ntfs_is_meta_file(sbi, ino)) - return 0; + return true; if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN)) - return 0; + return true; name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name, PATH_MAX); if (name_len <= 0) { ntfs_warn(sbi->sb, "failed to convert name for inode %lx.", ino); - return 0; + return true; } /* @@ -326,7 +329,8 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, * It does additional locks/reads just to get the type of name. * Should we use additional mount option to enable branch below? */ - if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + if (((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) || + fname->dup.ea_size) && ino != ni->mi.rno) { struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); if (!IS_ERR_OR_NULL(inode)) { @@ -335,17 +339,20 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, } } - return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); + return dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } /* * ntfs_read_hdr - Helper function for ntfs_readdir(). + * + * returns 0 if ok. + * returns -EINVAL if directory is corrupted. + * returns +1 if 'ctx' is full. */ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, const struct INDEX_HDR *hdr, u64 vbo, u64 pos, u8 *name, struct dir_context *ctx) { - int err; const struct NTFS_DE *e; u32 e_size; u32 end = le32_to_cpu(hdr->used); @@ -353,12 +360,12 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, for (;; off += e_size) { if (off + sizeof(struct NTFS_DE) > end) - return -1; + return -EINVAL; e = Add2Ptr(hdr, off); e_size = le16_to_cpu(e->size); if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) - return -1; + return -EINVAL; if (de_is_last(e)) return 0; @@ -368,14 +375,15 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, continue; if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME) - return -1; + return -EINVAL; ctx->pos = vbo + off; /* Submit the name to the filldir callback. */ - err = ntfs_filldir(sbi, ni, e, name, ctx); - if (err) - return err; + if (!ntfs_dir_emit(sbi, ni, e, name, ctx)) { + /* ctx is full. */ + return +1; + } } } @@ -474,8 +482,6 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) vbo = (u64)bit << index_bits; if (vbo >= i_size) { - ntfs_inode_err(dir, "Looks like your dir is corrupt"); - ctx->pos = eod; err = -EINVAL; goto out; } @@ -498,9 +504,16 @@ out: __putname(name); put_indx_node(node); - if (err == -ENOENT) { + if (err == 1) { + /* 'ctx' is full. */ + err = 0; + } else if (err == -ENOENT) { err = 0; ctx->pos = pos; + } else if (err < 0) { + if (err == -EINVAL) + ntfs_inode_err(dir, "directory corrupted"); + ctx->pos = eod; } return err; @@ -618,10 +631,12 @@ const struct file_operations ntfs_dir_operations = { #endif }; +#if IS_ENABLED(CONFIG_NTFS_FS) const struct file_operations ntfs_legacy_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = ntfs_readdir, .open = ntfs_file_open, }; +#endif // clang-format on diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 2f903b6ce157..ca1ddc46bd86 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -13,6 +13,7 @@ #include <linux/compat.h> #include <linux/falloc.h> #include <linux/fiemap.h> +#include <linux/fileattr.h> #include "debug.h" #include "ntfs.h" @@ -48,6 +49,65 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) return 0; } +/* + * ntfs_fileattr_get - inode_operations::fileattr_get + */ +int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct ntfs_inode *ni = ntfs_i(inode); + u32 flags = 0; + + if (inode->i_flags & S_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + + if (inode->i_flags & S_APPEND) + flags |= FS_APPEND_FL; + + if (is_compressed(ni)) + flags |= FS_COMPR_FL; + + if (is_encrypted(ni)) + flags |= FS_ENCRYPT_FL; + + fileattr_fill_flags(fa, flags); + + return 0; +} + +/* + * ntfs_fileattr_set - inode_operations::fileattr_set + */ +int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, + struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + u32 flags = fa->flags; + unsigned int new_fl = 0; + + if (fileattr_has_fsx(fa)) + return -EOPNOTSUPP; + + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL)) + return -EOPNOTSUPP; + + if (flags & FS_IMMUTABLE_FL) + new_fl |= S_IMMUTABLE; + + if (flags & FS_APPEND_FL) + new_fl |= S_APPEND; + + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND); + + inode_set_ctime_current(inode); + mark_inode_dirty(inode); + + return 0; +} + +/* + * ntfs_ioctl - file_operations::unlocked_ioctl + */ long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -77,20 +137,27 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct inode *inode = d_inode(path->dentry); struct ntfs_inode *ni = ntfs_i(inode); + stat->result_mask |= STATX_BTIME; + stat->btime = ni->i_crtime; + stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */ + + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (is_compressed(ni)) stat->attributes |= STATX_ATTR_COMPRESSED; if (is_encrypted(ni)) stat->attributes |= STATX_ATTR_ENCRYPTED; - stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED; + stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | STATX_ATTR_APPEND; generic_fillattr(idmap, request_mask, inode, stat); - stat->result_mask |= STATX_BTIME; - stat->btime = ni->i_crtime; - stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */ - return 0; } @@ -196,9 +263,9 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) PAGE_SIZE; iblock = page_off >> inode->i_blkbits; - folio = __filemap_get_folio(mapping, idx, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - mapping_gfp_constraint(mapping, ~__GFP_FS)); + folio = __filemap_get_folio( + mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_constraint(mapping, ~__GFP_FS)); if (IS_ERR(folio)) return PTR_ERR(folio); @@ -253,8 +320,7 @@ out: */ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) { - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT); bool rw = vma->vm_flags & VM_WRITE; @@ -299,10 +365,7 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) } if (ni->i_valid < to) { - if (!inode_trylock(inode)) { - err = -EAGAIN; - goto out; - } + inode_lock(inode); err = ntfs_extend_initialized_size(file, ni, ni->i_valid, to); inode_unlock(inode); @@ -431,7 +494,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) */ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) { - struct inode *inode = file->f_mapping->host; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; struct super_block *sb = inode->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; @@ -744,7 +807,7 @@ out: static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) @@ -781,7 +844,7 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - struct inode *inode = in->f_mapping->host; + struct inode *inode = file_inode(in); struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) @@ -824,23 +887,25 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index, *frame_uptodate = true; for (npages = 0; npages < pages_per_frame; npages++, index++) { - struct page *page; + struct folio *folio; - page = find_or_create_page(mapping, index, gfp_mask); - if (!page) { + folio = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + gfp_mask); + if (IS_ERR(folio)) { while (npages--) { - page = pages[npages]; - unlock_page(page); - put_page(page); + folio = page_folio(pages[npages]); + folio_unlock(folio); + folio_put(folio); } return -ENOMEM; } - if (!PageUptodate(page)) + if (!folio_test_uptodate(folio)) *frame_uptodate = false; - pages[npages] = page; + pages[npages] = &folio->page; } return 0; @@ -1075,8 +1140,7 @@ out: static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = file_inode(file); ssize_t ret; int err; struct ntfs_inode *ni = ntfs_i(inode); @@ -1198,7 +1262,7 @@ static int ntfs_file_release(struct inode *inode, struct file *file) } /* - * ntfs_fiemap - file_operations::fiemap + * ntfs_fiemap - inode_operations::fiemap */ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) @@ -1227,6 +1291,8 @@ const struct inode_operations ntfs_file_inode_operations = { .get_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, .fiemap = ntfs_fiemap, + .fileattr_get = ntfs_fileattr_get, + .fileattr_set = ntfs_fileattr_set, }; const struct file_operations ntfs_file_operations = { @@ -1246,6 +1312,7 @@ const struct file_operations ntfs_file_operations = { .release = ntfs_file_release, }; +#if IS_ENABLED(CONFIG_NTFS_FS) const struct file_operations ntfs_legacy_file_operations = { .llseek = generic_file_llseek, .read_iter = ntfs_file_read_iter, @@ -1253,4 +1320,5 @@ const struct file_operations ntfs_legacy_file_operations = { .open = ntfs_file_open, .release = ntfs_file_release, }; +#endif // clang-format on diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 0008670939a4..a469c608a394 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -122,10 +122,10 @@ void ni_clear(struct ntfs_inode *ni) else { run_close(&ni->file.run); #ifdef CONFIG_NTFS3_LZX_XPRESS - if (ni->file.offs_page) { + if (ni->file.offs_folio) { /* On-demand allocated page for offsets. */ - put_page(ni->file.offs_page); - ni->file.offs_page = NULL; + folio_put(ni->file.offs_folio); + ni->file.offs_folio = NULL; } #endif } @@ -1501,7 +1501,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, if (is_ext) { if (flags & ATTR_FLAG_COMPRESSED) - attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.c_unit = NTFS_LZNT_CUNIT; attr->nres.total_size = attr->nres.alloc_size; } @@ -1601,8 +1601,10 @@ int ni_delete_all(struct ntfs_inode *ni) asize = le32_to_cpu(attr->size); roff = le16_to_cpu(attr->nres.run_off); - if (roff > asize) + if (roff > asize) { + _ntfs_bad_inode(&ni->vfs_inode); return -EINVAL; + } /* run==1 means unpack and deallocate. */ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, @@ -1897,6 +1899,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, } /* + * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent + * but it accepts kernel address for fi_extents_start + */ +static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo, + u64 logical, u64 phys, u64 len, u32 flags) +{ + struct fiemap_extent extent; + struct fiemap_extent __user *dest = fieinfo->fi_extents_start; + + /* only count the extents */ + if (fieinfo->fi_extents_max == 0) { + fieinfo->fi_extents_mapped++; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; + } + + if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) + return 1; + + if (flags & FIEMAP_EXTENT_DELALLOC) + flags |= FIEMAP_EXTENT_UNKNOWN; + if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED) + flags |= FIEMAP_EXTENT_ENCODED; + if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE)) + flags |= FIEMAP_EXTENT_NOT_ALIGNED; + + memset(&extent, 0, sizeof(extent)); + extent.fe_logical = logical; + extent.fe_physical = phys; + extent.fe_length = len; + extent.fe_flags = flags; + + dest += fieinfo->fi_extents_mapped; + memcpy(dest, &extent, sizeof(extent)); + + fieinfo->fi_extents_mapped++; + if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) + return 1; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; +} + +/* * ni_fiemap - Helper for file_fiemap(). * * Assumed ni_lock. @@ -1906,6 +1949,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, __u64 vbo, __u64 len) { int err = 0; + struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start; + struct fiemap_extent *fe_k = NULL; struct ntfs_sb_info *sbi = ni->mi.sbi; u8 cluster_bits = sbi->cluster_bits; struct runs_tree *run; @@ -1953,6 +1998,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, goto out; } + /* + * To avoid lock problems replace pointer to user memory by pointer to kernel memory. + */ + fe_k = kmalloc_array(fieinfo->fi_extents_max, + sizeof(struct fiemap_extent), + GFP_NOFS | __GFP_ZERO); + if (!fe_k) { + err = -ENOMEM; + goto out; + } + fieinfo->fi_extents_start = fe_k; + end = vbo + len; alloc_size = le64_to_cpu(attr->nres.alloc_size); if (end > alloc_size) @@ -2041,8 +2098,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, if (vbo + dlen >= end) flags |= FIEMAP_EXTENT_LAST; - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, - flags); + err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen, + flags); + if (err < 0) break; if (err == 1) { @@ -2062,7 +2120,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, if (vbo + bytes >= end) flags |= FIEMAP_EXTENT_LAST; - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); + err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes, + flags); if (err < 0) break; if (err == 1) { @@ -2075,7 +2134,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, up_read(run_lock); + /* + * Copy to user memory out of lock + */ + if (copy_to_user(fe_u, fe_k, + fieinfo->fi_extents_max * + sizeof(struct fiemap_extent))) { + err = -EFAULT; + } + out: + /* Restore original pointer. */ + fieinfo->fi_extents_start = fe_u; + kfree(fe_k); return err; } @@ -2085,12 +2156,12 @@ out: * When decompressing, we typically obtain more than one page per reference. * We inject the additional pages into the page cache. */ -int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) +int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio) { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; - struct address_space *mapping = page->mapping; - pgoff_t index = page->index; + struct address_space *mapping = folio->mapping; + pgoff_t index = folio->index; u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT; struct page **pages = NULL; /* Array of at most 16 pages. stack? */ u8 frame_bits; @@ -2100,7 +2171,8 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) struct page *pg; if (vbo >= i_size_read(&ni->vfs_inode)) { - SetPageUptodate(page); + folio_zero_range(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); err = 0; goto out; } @@ -2124,7 +2196,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) goto out; } - pages[idx] = page; + pages[idx] = &folio->page; index = frame_vbo >> PAGE_SHIFT; gfp_mask = mapping_gfp_mask(mapping); @@ -2143,9 +2215,6 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame); out1: - if (err) - SetPageError(page); - for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; if (i == idx || !pg) @@ -2157,7 +2226,7 @@ out1: out: /* At this point, err contains 0 or -EIO depending on the "critical" page. */ kfree(pages); - unlock_page(page); + folio_unlock(folio); return err; } @@ -2362,9 +2431,9 @@ remove_wof: /* Clear cached flag. */ ni->ni_flags &= ~NI_FLAG_COMPRESSED_MASK; - if (ni->file.offs_page) { - put_page(ni->file.offs_page); - ni->file.offs_page = NULL; + if (ni->file.offs_folio) { + folio_put(ni->file.offs_folio); + ni->file.offs_folio = NULL; } mapping->a_ops = &ntfs_aops; @@ -2718,7 +2787,6 @@ out: for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; kunmap(pg); - ClearPageError(pg); SetPageUptodate(pg); } diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index d7807d255dfe..c64dd114ac65 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -724,7 +724,8 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes) if (!rsize || rsize > bytes || rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts || - le16_to_cpu(rt->total) > ne || ff > ts || lf > ts || + le16_to_cpu(rt->total) > ne || ff > ts - sizeof(__le32) || + lf > ts - sizeof(__le32) || (ff && ff < sizeof(struct RESTART_TABLE)) || (lf && lf < sizeof(struct RESTART_TABLE))) { return false; @@ -754,6 +755,9 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes) return false; off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off)); + + if (off > ts - sizeof(__le32)) + return false; } return true; @@ -2992,7 +2996,7 @@ static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi, if (is_ext) { attr->name_off = SIZEOF_NONRESIDENT_EX_LE; if (is_attr_compressed(attr)) - attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.c_unit = NTFS_LZNT_CUNIT; attr->nres.run_off = cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size); @@ -3722,6 +3726,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) u64 rec_lsn, checkpt_lsn = 0, rlsn = 0; struct ATTR_NAME_ENTRY *attr_names = NULL; + u32 attr_names_bytes = 0; + u32 oatbl_bytes = 0; struct RESTART_TABLE *dptbl = NULL; struct RESTART_TABLE *trtbl = NULL; const struct RESTART_TABLE *rt; @@ -3736,6 +3742,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) struct NTFS_RESTART *rst = NULL; struct lcb *lcb = NULL; struct OPEN_ATTR_ENRTY *oe; + struct ATTR_NAME_ENTRY *ane; struct TRANSACTION_ENTRY *tr; struct DIR_PAGE_ENTRY *dp; u32 i, bytes_per_attr_entry; @@ -3915,6 +3922,9 @@ check_restart_area: goto out; } + log->page_mask = log->page_size - 1; + log->page_bits = blksize_bits(log->page_size); + /* If the file size has shrunk then we won't mount it. */ if (log->l_size < le64_to_cpu(ra2->l_size)) { err = -EINVAL; @@ -4104,7 +4114,7 @@ process_log: /* Allocate and Read the Transaction Table. */ if (!rst->transact_table_len) - goto check_dirty_page_table; + goto check_dirty_page_table; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->transact_table_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4144,7 +4154,7 @@ process_log: check_dirty_page_table: /* The next record back should be the Dirty Pages Table. */ if (!rst->dirty_pages_len) - goto check_attribute_names; + goto check_attribute_names; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->dirty_pages_table_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4180,7 +4190,7 @@ check_dirty_page_table: /* Convert Ra version '0' into version '1'. */ if (rst->major_ver) - goto end_conv_1; + goto end_conv_1; /* reduce tab pressure. */ dp = NULL; while ((dp = enum_rstbl(dptbl, dp))) { @@ -4200,8 +4210,7 @@ end_conv_1: * remembering the oldest lsn values. */ if (sbi->cluster_size <= log->page_size) - goto trace_dp_table; - + goto trace_dp_table; /* reduce tab pressure. */ dp = NULL; while ((dp = enum_rstbl(dptbl, dp))) { struct DIR_PAGE_ENTRY *next = dp; @@ -4222,7 +4231,7 @@ trace_dp_table: check_attribute_names: /* The next record should be the Attribute Names. */ if (!rst->attr_names_len) - goto check_attr_table; + goto check_attr_table; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->attr_names_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4240,9 +4249,9 @@ check_attribute_names: } t32 = lrh_length(lrh); - rec_len -= t32; + attr_names_bytes = rec_len - t32; - attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS); + attr_names = kmemdup(Add2Ptr(lrh, t32), attr_names_bytes, GFP_NOFS); if (!attr_names) { err = -ENOMEM; goto out; @@ -4254,7 +4263,7 @@ check_attribute_names: check_attr_table: /* The next record should be the attribute Table. */ if (!rst->open_attr_len) - goto check_attribute_names2; + goto check_attribute_names2; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->open_attr_table_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4274,14 +4283,14 @@ check_attr_table: t16 = le16_to_cpu(lrh->redo_off); rt = Add2Ptr(lrh, t16); - t32 = rec_len - t16; + oatbl_bytes = rec_len - t16; - if (!check_rstbl(rt, t32)) { + if (!check_rstbl(rt, oatbl_bytes)) { err = -EINVAL; goto out; } - oatbl = kmemdup(rt, t32, GFP_NOFS); + oatbl = kmemdup(rt, oatbl_bytes, GFP_NOFS); if (!oatbl) { err = -ENOMEM; goto out; @@ -4314,17 +4323,40 @@ check_attr_table: lcb = NULL; check_attribute_names2: - if (rst->attr_names_len && oatbl) { - struct ATTR_NAME_ENTRY *ane = attr_names; - while (ane->off) { + if (attr_names && oatbl) { + off = 0; + for (;;) { + /* Check we can use attribute name entry 'ane'. */ + static_assert(sizeof(*ane) == 4); + if (off + sizeof(*ane) > attr_names_bytes) { + /* just ignore the rest. */ + break; + } + + ane = Add2Ptr(attr_names, off); + t16 = le16_to_cpu(ane->off); + if (!t16) { + /* this is the only valid exit. */ + break; + } + + /* Check we can use open attribute entry 'oe'. */ + if (t16 + sizeof(*oe) > oatbl_bytes) { + /* just ignore the rest. */ + break; + } + /* TODO: Clear table on exit! */ - oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); + oe = Add2Ptr(oatbl, t16); t16 = le16_to_cpu(ane->name_bytes); + off += t16 + sizeof(*ane); + if (off > attr_names_bytes) { + /* just ignore the rest. */ + break; + } oe->name_len = t16 / sizeof(short); oe->ptr = ane->name; oe->is_attr_name = 2; - ane = Add2Ptr(ane, - sizeof(struct ATTR_NAME_ENTRY) + t16); } } @@ -4520,7 +4552,6 @@ copy_lcns: } } goto next_log_record_analyze; - ; } case OpenNonresidentAttribute: @@ -4659,7 +4690,7 @@ end_log_records_enumerate: * table are not empty. */ if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total)) - goto end_reply; + goto end_replay; sbi->flags |= NTFS_FLAGS_NEED_REPLAY; if (is_ro) @@ -5088,7 +5119,7 @@ undo_action_done: sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY; -end_reply: +end_replay: err = 0; if (is_ro) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 626d3f2c7e2d..0fa636038b4e 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -2650,8 +2650,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) { int err; struct ATTRIB *attr; + u32 uni_bytes; struct ntfs_inode *ni = sbi->volume.ni; - const u8 max_ulen = 0x80; /* TODO: use attrdef to get maximum length */ /* Allocate PATH_MAX bytes. */ struct cpu_str *uni = __getname(); @@ -2663,7 +2663,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) if (err < 0) goto out; - if (uni->len > max_ulen) { + uni_bytes = uni->len * sizeof(u16); + if (uni_bytes > NTFS_LABEL_MAX_LENGTH * sizeof(u16)) { ntfs_warn(sbi->sb, "new label is too long"); err = -EFBIG; goto out; @@ -2674,13 +2675,13 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) /* Ignore any errors. */ ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL); - err = ni_insert_resident(ni, uni->len * sizeof(u16), ATTR_LABEL, NULL, - 0, &attr, NULL, NULL); + err = ni_insert_resident(ni, uni_bytes, ATTR_LABEL, NULL, 0, &attr, + NULL, NULL); if (err < 0) goto unlock_out; /* write new label in on-disk struct. */ - memcpy(resident_data(attr), uni->name, uni->len * sizeof(u16)); + memcpy(resident_data(attr), uni->name, uni_bytes); /* update cached value of current label. */ if (len >= ARRAY_SIZE(sbi->volume.label)) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index d0f15bbf78f6..9089c58a005c 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -978,7 +978,7 @@ static struct indx_node *indx_new(struct ntfs_index *indx, hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64)); de_set_vbn_le(e, *sub_vbn); - hdr->flags = 1; + hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; } else { e->size = cpu_to_le16(sizeof(struct NTFS_DE)); hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE)); @@ -1683,7 +1683,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST; - hdr->flags = 1; + hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; hdr->used = hdr->total = cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr)); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 0f1664db94ad..6b0bdc474e76 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -18,7 +18,7 @@ #include "ntfs_fs.h" /* - * ntfs_read_mft - Read record and parses MFT. + * ntfs_read_mft - Read record and parse MFT. */ static struct inode *ntfs_read_mft(struct inode *inode, const struct cpu_str *name, @@ -441,10 +441,9 @@ end_enum: * Usually a hard links to directories are disabled. */ inode->i_op = &ntfs_dir_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_dir_operations; - else - inode->i_fop = &ntfs_dir_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_dir_operations : + &ntfs_dir_operations; ni->i_valid = 0; } else if (S_ISLNK(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; @@ -454,10 +453,9 @@ end_enum: } else if (S_ISREG(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; inode->i_op = &ntfs_file_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_file_operations; - else - inode->i_fop = &ntfs_file_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_file_operations : + &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; if (ino != MFT_REC_MFT) @@ -580,10 +578,11 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, bh->b_blocknr = RESIDENT_LCN; bh->b_size = block_size; if (!folio) { + /* direct io (read) or bmap call */ err = 0; } else { ni_lock(ni); - err = attr_data_read_resident(ni, &folio->page); + err = attr_data_read_resident(ni, folio); ni_unlock(ni); if (!err) @@ -710,25 +709,24 @@ static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) static int ntfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; int err; - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); if (is_resident(ni)) { ni_lock(ni); - err = attr_data_read_resident(ni, page); + err = attr_data_read_resident(ni, folio); ni_unlock(ni); if (err != E_NTFS_NONRESIDENT) { - unlock_page(page); + folio_unlock(folio); return err; } } if (is_compressed(ni)) { ni_lock(ni); - err = ni_readpage_cmpr(ni, page); + err = ni_readpage_cmpr(ni, folio); ni_unlock(ni); return err; } @@ -872,7 +870,7 @@ static int ntfs_resident_writepage(struct folio *folio, return -EIO; ni_lock(ni); - ret = attr_data_write_resident(ni, &folio->page); + ret = attr_data_write_resident(ni, folio); ni_unlock(ni); if (ret != E_NTFS_NONRESIDENT) @@ -914,24 +912,25 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping, *pagep = NULL; if (is_resident(ni)) { - struct page *page = - grab_cache_page_write_begin(mapping, pos >> PAGE_SHIFT); + struct folio *folio = __filemap_get_folio( + mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); - if (!page) { - err = -ENOMEM; + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out; } ni_lock(ni); - err = attr_data_read_resident(ni, page); + err = attr_data_read_resident(ni, folio); ni_unlock(ni); if (!err) { - *pagep = page; + *pagep = &folio->page; goto out; } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); if (err != E_NTFS_NONRESIDENT) goto out; @@ -950,6 +949,7 @@ out: int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, u32 len, u32 copied, struct page *page, void *fsdata) { + struct folio *folio = page_folio(page); struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); u64 valid = ni->i_valid; @@ -958,26 +958,26 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, if (is_resident(ni)) { ni_lock(ni); - err = attr_data_write_resident(ni, page); + err = attr_data_write_resident(ni, folio); ni_unlock(ni); if (!err) { + struct buffer_head *head = folio_buffers(folio); dirty = true; - /* Clear any buffers in page. */ - if (page_has_buffers(page)) { - struct buffer_head *head, *bh; + /* Clear any buffers in folio. */ + if (head) { + struct buffer_head *bh = head; - bh = head = page_buffers(page); do { clear_buffer_dirty(bh); clear_buffer_mapped(bh); set_buffer_uptodate(bh); } while (head != (bh = bh->b_this_page)); } - SetPageUptodate(page); + folio_mark_uptodate(folio); err = copied; } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } else { err = generic_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -1093,33 +1093,31 @@ int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, if (!ret && i2) ret = writeback_inode(i2); if (!ret) - ret = sync_blockdev_nowait(sb->s_bdev); + ret = filemap_flush(sb->s_bdev_file->f_mapping); return ret; } -int inode_write_data(struct inode *inode, const void *data, size_t bytes) +/* + * Helper function to read file. + */ +int inode_read_data(struct inode *inode, void *data, size_t bytes) { pgoff_t idx; + struct address_space *mapping = inode->i_mapping; - /* Write non resident data. */ for (idx = 0; bytes; idx++) { size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes; - struct page *page = ntfs_map_page(inode->i_mapping, idx); + struct page *page = read_mapping_page(mapping, idx, NULL); + void *kaddr; if (IS_ERR(page)) return PTR_ERR(page); - lock_page(page); - WARN_ON(!PageUptodate(page)); - ClearPageUptodate(page); - - memcpy(page_address(page), data, op); - - flush_dcache_page(page); - SetPageUptodate(page); - unlock_page(page); + kaddr = kmap_atomic(page); + memcpy(data, kaddr, op); + kunmap_atomic(kaddr); - ntfs_unmap_page(page); + put_page(page); bytes -= op; data = Add2Ptr(data, PAGE_SIZE); @@ -1508,7 +1506,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8); attr->name_off = SIZEOF_NONRESIDENT_EX_LE; attr->flags = ATTR_FLAG_COMPRESSED; - attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.c_unit = NTFS_LZNT_CUNIT; asize = SIZEOF_NONRESIDENT_EX + 8; } else { attr->size = cpu_to_le32(SIZEOF_NONRESIDENT + 8); @@ -1559,7 +1557,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, /* * Below function 'ntfs_save_wsl_perm' requires 0x78 bytes. - * It is good idea to keep extened attributes resident. + * It is good idea to keep extended attributes resident. */ if (asize + t16 + 0x78 + 8 > sbi->record_size) { CLST alen; @@ -1628,10 +1626,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, if (S_ISDIR(mode)) { inode->i_op = &ntfs_dir_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_dir_operations; - else - inode->i_fop = &ntfs_dir_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_dir_operations : + &ntfs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &ntfs_link_inode_operations; inode->i_fop = NULL; @@ -1640,10 +1637,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, inode_nohighmem(inode); } else if (S_ISREG(mode)) { inode->i_op = &ntfs_file_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_file_operations; - else - inode->i_fop = &ntfs_file_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_file_operations : + &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; init_rwsem(&ni->file.run_lock); @@ -1668,7 +1664,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, * The packed size of extended attribute is stored in direntry too. * 'fname' here points to inside new_de. */ - ntfs_save_wsl_perm(inode, &fname->dup.ea_size); + err = ntfs_save_wsl_perm(inode, &fname->dup.ea_size); + if (err) + goto out6; /* * update ea_size in file_name attribute too. @@ -1712,6 +1710,12 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, goto out2; out6: + attr = ni_find_attr(ni, NULL, NULL, ATTR_EA, NULL, 0, NULL, NULL); + if (attr && attr->non_res) { + /* Delete ATTR_EA, if non-resident. */ + attr_set_size(ni, ATTR_EA, NULL, 0, NULL, 0, NULL, false, NULL); + } + if (rp_inserted) ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref); @@ -2133,5 +2137,6 @@ const struct address_space_operations ntfs_aops = { const struct address_space_operations ntfs_aops_cmpr = { .read_folio = ntfs_read_folio, .readahead = ntfs_readahead, + .dirty_folio = block_dirty_folio, }; // clang-format on diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index 71498421ce60..f16d318c4372 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -112,9 +112,7 @@ static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir, } /* - * ntfs_mknod - * - * inode_operations::mknod + * ntfs_mknod - inode_operations::mknod */ static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) @@ -509,6 +507,8 @@ const struct inode_operations ntfs_dir_inode_operations = { .getattr = ntfs_getattr, .listxattr = ntfs_listxattr, .fiemap = ntfs_fiemap, + .fileattr_get = ntfs_fileattr_get, + .fileattr_set = ntfs_fileattr_set, }; const struct inode_operations ntfs_special_inode_operations = { diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 3d6143c7abc0..241f2ffdd920 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -82,9 +82,6 @@ typedef u32 CLST; #define RESIDENT_LCN ((CLST)-2) #define COMPRESSED_LCN ((CLST)-3) -#define COMPRESSION_UNIT 4 -#define COMPRESS_MAX_CLUSTER 0x1000 - enum RECORD_NUM { MFT_REC_MFT = 0, MFT_REC_MIRR = 1, @@ -696,14 +693,15 @@ static inline bool de_has_vcn_ex(const struct NTFS_DE *e) offsetof(struct ATTR_FILE_NAME, name) + \ NTFS_NAME_LEN * sizeof(short), 8) +#define NTFS_INDEX_HDR_HAS_SUBNODES cpu_to_le32(1) + struct INDEX_HDR { __le32 de_off; // 0x00: The offset from the start of this structure // to the first NTFS_DE. __le32 used; // 0x04: The size of this structure plus all // entries (quad-word aligned). __le32 total; // 0x08: The allocated size of for this structure plus all entries. - u8 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory. - u8 res[3]; + __le32 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory. // // de_off + used <= total @@ -751,7 +749,7 @@ static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr, static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr) { - return hdr->flags & 1; + return hdr->flags & NTFS_INDEX_HDR_HAS_SUBNODES; } struct INDEX_BUFFER { @@ -771,7 +769,7 @@ static inline bool ib_is_empty(const struct INDEX_BUFFER *ib) static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib) { - return !(ib->ihdr.flags & 1); + return !(ib->ihdr.flags & NTFS_INDEX_HDR_HAS_SUBNODES); } /* Index root structure ( 0x90 ). */ @@ -1002,9 +1000,6 @@ struct REPARSE_POINT { static_assert(sizeof(struct REPARSE_POINT) == 0x18); -/* Maximum allowed size of the reparse data. */ -#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE (16 * 1024) - /* * The value of the following constant needs to satisfy the following * conditions: diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index f9ed6d2b065d..e5255a251929 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -383,7 +383,7 @@ struct ntfs_inode { struct rw_semaphore run_lock; struct runs_tree run; #ifdef CONFIG_NTFS3_LZX_XPRESS - struct page *offs_page; + struct folio *offs_folio; #endif } file; }; @@ -434,8 +434,8 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, struct ATTRIB **ret); int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, CLST *len, bool *new, bool zero); -int attr_data_read_resident(struct ntfs_inode *ni, struct page *page); -int attr_data_write_resident(struct ntfs_inode *ni, struct page *page); +int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio); +int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio); int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, struct runs_tree *run, CLST vcn); @@ -497,6 +497,9 @@ extern const struct file_operations ntfs_dir_operations; extern const struct file_operations ntfs_legacy_dir_operations; /* Globals from file.c */ +int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, + struct fileattr *fa); int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, @@ -564,7 +567,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint); #define _ni_write_inode(i, w) ni_write_inode(i, w, __func__) int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, __u64 vbo, __u64 len); -int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page); +int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio); int ni_decompress_file(struct ntfs_inode *ni); int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, u32 pages_per_frame); @@ -716,7 +719,7 @@ int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); int ntfs_sync_inode(struct inode *inode); int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); -int inode_write_data(struct inode *inode, const void *data, size_t bytes); +int inode_read_data(struct inode *inode, void *data, size_t bytes); int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const struct cpu_str *uni, umode_t mode, dev_t dev, const char *symname, u32 size, @@ -910,22 +913,6 @@ static inline bool ntfs_is_meta_file(struct ntfs_sb_info *sbi, CLST rno) rno == sbi->usn_jrnl_no; } -static inline void ntfs_unmap_page(struct page *page) -{ - kunmap(page); - put_page(page); -} - -static inline struct page *ntfs_map_page(struct address_space *mapping, - unsigned long index) -{ - struct page *page = read_mapping_page(mapping, index, NULL); - - if (!IS_ERR(page)) - kmap(page); - return page; -} - static inline size_t wnd_zone_bit(const struct wnd_bitmap *wnd) { return wnd->zone_bit; @@ -1156,6 +1143,13 @@ static inline void le64_sub_cpu(__le64 *var, u64 val) *var = cpu_to_le64(le64_to_cpu(*var) - val); } +#if IS_ENABLED(CONFIG_NTFS_FS) bool is_legacy_ntfs(struct super_block *sb); +#else +static inline bool is_legacy_ntfs(struct super_block *sb) +{ + return false; +} +#endif #endif /* _LINUX_NTFS3_NTFS_FS_H */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index c5b688c5f984..a8758b85803f 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -275,7 +275,7 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag_no("acl", Opt_acl), fsparam_string("iocharset", Opt_iocharset), fsparam_flag_no("prealloc", Opt_prealloc), - fsparam_flag_no("nocase", Opt_nocase), + fsparam_flag_no("case", Opt_nocase), {} }; // clang-format on @@ -464,7 +464,7 @@ static int ntfs3_volinfo(struct seq_file *m, void *o) struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; - seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n", + seq_printf(m, "ntfs%d.%d\n%u\n%zu\n%zu\n%zu\n%s\n%s\n", sbi->volume.major_ver, sbi->volume.minor_ver, sbi->cluster_size, sbi->used.bitmap.nbits, sbi->mft.bitmap.nbits, @@ -1159,7 +1159,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) CLST vcn, lcn, len; struct ATTRIB *attr; const struct VOLUME_INFO *info; - u32 idx, done, bytes; + u32 done, bytes; struct ATTR_DEF_ENTRY *t; u16 *shared; struct MFT_REF ref; @@ -1201,7 +1201,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) /* * Load $Volume. This should be done before $LogFile - * 'cause 'sbi->volume.ni' is used 'ntfs_set_state'. + * 'cause 'sbi->volume.ni' is used in 'ntfs_set_state'. */ ref.low = cpu_to_le32(MFT_REC_VOL); ref.seq = cpu_to_le16(MFT_REC_VOL); @@ -1431,31 +1431,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) goto put_inode_out; } - for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) { - unsigned long tail = bytes - done; - struct page *page = ntfs_map_page(inode->i_mapping, idx); + /* Read the entire file. */ + err = inode_read_data(inode, sbi->def_table, bytes); + if (err) { + ntfs_err(sb, "Failed to read $AttrDef (%d).", err); + goto put_inode_out; + } - if (IS_ERR(page)) { - err = PTR_ERR(page); - ntfs_err(sb, "Failed to read $AttrDef (%d).", err); - goto put_inode_out; - } - memcpy(Add2Ptr(t, done), page_address(page), - min(PAGE_SIZE, tail)); - ntfs_unmap_page(page); - - if (!idx && ATTR_STD != t->type) { - ntfs_err(sb, "$AttrDef is corrupted."); - err = -EINVAL; - goto put_inode_out; - } + if (ATTR_STD != t->type) { + ntfs_err(sb, "$AttrDef is corrupted."); + err = -EINVAL; + goto put_inode_out; } t += 1; sbi->def_entries = 1; done = sizeof(struct ATTR_DEF_ENTRY); - sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE; - sbi->ea_max_size = 0x10000; /* default formatter value */ while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) { u32 t32 = le32_to_cpu(t->type); @@ -1491,27 +1482,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) goto put_inode_out; } - for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) { - const __le16 *src; - u16 *dst = Add2Ptr(sbi->upcase, idx << PAGE_SHIFT); - struct page *page = ntfs_map_page(inode->i_mapping, idx); - - if (IS_ERR(page)) { - err = PTR_ERR(page); - ntfs_err(sb, "Failed to read $UpCase (%d).", err); - goto put_inode_out; - } - - src = page_address(page); + /* Read the entire file. */ + err = inode_read_data(inode, sbi->upcase, 0x10000 * sizeof(short)); + if (err) { + ntfs_err(sb, "Failed to read $UpCase (%d).", err); + goto put_inode_out; + } #ifdef __BIG_ENDIAN - for (i = 0; i < PAGE_SIZE / sizeof(u16); i++) + { + const __le16 *src = sbi->upcase; + u16 *dst = sbi->upcase; + + for (i = 0; i < 0x10000; i++) *dst++ = le16_to_cpu(*src++); -#else - memcpy(dst, src, PAGE_SIZE); -#endif - ntfs_unmap_page(page); } +#endif shared = ntfs_set_shared(sbi->upcase, 0x10000 * sizeof(short)); if (shared && sbi->upcase != shared) { @@ -1847,10 +1833,8 @@ bool is_legacy_ntfs(struct super_block *sb) #else static inline void register_as_ntfs_legacy(void) {} static inline void unregister_as_ntfs_legacy(void) {} -bool is_legacy_ntfs(struct super_block *sb) { return false; } #endif - // clang-format on static int __init init_ntfs_fs(void) @@ -1876,8 +1860,7 @@ static int __init init_ntfs_fs(void) ntfs_inode_cachep = kmem_cache_create( "ntfs_inode_cache", sizeof(struct ntfs_inode), 0, - (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), - init_once); + (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), init_once); if (!ntfs_inode_cachep) { err = -ENOMEM; goto out1; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 73785dece7a7..0703e1ae32b2 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -195,10 +195,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, { const struct EA_INFO *info; struct EA_FULL *ea_all = NULL; - const struct EA_FULL *ea; u32 off, size; int err; - int ea_size; size_t ret; err = ntfs_read_ea(ni, &ea_all, 0, &info); @@ -212,16 +210,18 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, /* Enumerate all xattrs. */ ret = 0; - for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) { - ea = Add2Ptr(ea_all, off); - ea_size = unpacked_ea_size(ea); + off = 0; + while (off + sizeof(struct EA_FULL) < size) { + const struct EA_FULL *ea = Add2Ptr(ea_all, off); + int ea_size = unpacked_ea_size(ea); + u8 name_len = ea->name_len; - if (!ea->name_len) + if (!name_len) break; - if (ea->name_len > ea_size) { + if (name_len > ea_size) { ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); - err = -EINVAL; /* corrupted fs */ + err = -EINVAL; /* corrupted fs. */ break; } @@ -230,16 +230,17 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (off + ea_size > size) break; - if (ret + ea->name_len + 1 > bytes_per_buffer) { + if (ret + name_len + 1 > bytes_per_buffer) { err = -ERANGE; goto out; } - memcpy(buffer + ret, ea->name, ea->name_len); - buffer[ret + ea->name_len] = 0; + memcpy(buffer + ret, ea->name, name_len); + buffer[ret + name_len] = 0; } - ret += ea->name_len + 1; + ret += name_len + 1; + off += ea_size; } out: diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d620d4c53c6f..f0beb173dbba 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -294,13 +294,16 @@ out: * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. */ -static int ocfs2_check_dir_entry(struct inode * dir, - struct ocfs2_dir_entry * de, - struct buffer_head * bh, +static int ocfs2_check_dir_entry(struct inode *dir, + struct ocfs2_dir_entry *de, + struct buffer_head *bh, + char *buf, + unsigned int size, unsigned long offset) { const char *error_msg = NULL; const int rlen = le16_to_cpu(de->rec_len); + const unsigned long next_offset = ((char *) de - buf) + rlen; if (unlikely(rlen < OCFS2_DIR_REC_LEN(1))) error_msg = "rec_len is smaller than minimal"; @@ -308,9 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir, error_msg = "rec_len % 4 != 0"; else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len))) error_msg = "rec_len is too small for name_len"; - else if (unlikely( - ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)) - error_msg = "directory entry across blocks"; + else if (unlikely(next_offset > size)) + error_msg = "directory entry overrun"; + else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) && + next_offset != size) + error_msg = "directory entry too close to end"; if (unlikely(error_msg != NULL)) mlog(ML_ERROR, "bad entry in directory #%llu: %s - " @@ -352,16 +357,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh, de_buf = first_de; dlimit = de_buf + bytes; - while (de_buf < dlimit) { + while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ de = (struct ocfs2_dir_entry *) de_buf; - if (de_buf + namelen <= dlimit && + if (de->name + namelen <= dlimit && ocfs2_match(namelen, name, de)) { /* found a match - just to be sure, do a full check */ - if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, bh, first_de, + bytes, offset)) { ret = -1; goto bail; } @@ -1138,7 +1144,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, pde = NULL; de = (struct ocfs2_dir_entry *) first_de; while (i < bytes) { - if (!ocfs2_check_dir_entry(dir, de, bh, i)) { + if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) { status = -EIO; mlog_errno(status); goto bail; @@ -1635,7 +1641,8 @@ int __ocfs2_add_entry(handle_t *handle, /* These checks should've already been passed by the * prepare function, but I guess we can leave them * here anyway. */ - if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, + size, offset)) { retval = -ENOENT; goto bail; } @@ -1774,7 +1781,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, } de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); - if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) { + if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data, + i_size_read(inode), ctx->pos)) { /* On error, skip the f_pos to the end. */ ctx->pos = i_size_read(inode); break; @@ -1867,7 +1875,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, while (ctx->pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); - if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { + if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data, + sb->s_blocksize, offset)) { /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; @@ -3339,7 +3348,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, struct super_block *sb = dir->i_sb; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_dir_entry *de, *last_de = NULL; - char *de_buf, *limit; + char *first_de, *de_buf, *limit; unsigned long offset = 0; unsigned int rec_len, new_rec_len, free_space; @@ -3352,14 +3361,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, else free_space = dir->i_sb->s_blocksize - i_size_read(dir); - de_buf = di->id2.i_data.id_data; + first_de = di->id2.i_data.id_data; + de_buf = first_de; limit = de_buf + i_size_read(dir); rec_len = OCFS2_DIR_REC_LEN(namelen); while (de_buf < limit) { de = (struct ocfs2_dir_entry *)de_buf; - if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, + i_size_read(dir), offset)) { ret = -ENOENT; goto out; } @@ -3441,7 +3452,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, /* move to next block */ de = (struct ocfs2_dir_entry *) bh->b_data; } - if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, + offset)) { status = -ENOENT; goto bail; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index cb40cafbc062..da78a04d6f0b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -221,12 +221,12 @@ struct ocfs2_lock_res_ops { */ #define LOCK_TYPE_USES_LVB 0x2 -static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .get_osb = ocfs2_get_inode_osb, .check_downconvert = ocfs2_check_meta_downconvert, .set_lvb = ocfs2_set_meta_lvb, @@ -234,50 +234,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_super_lops = { +static const struct ocfs2_lock_res_ops ocfs2_super_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH, }; -static struct ocfs2_lock_res_ops ocfs2_rename_lops = { +static const struct ocfs2_lock_res_ops ocfs2_rename_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { +static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { +static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { +static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { +static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, .downconvert_worker = ocfs2_dentry_convert_worker, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_flock_lops = { +static const struct ocfs2_lock_res_ops ocfs2_flock_lops = { .get_osb = ocfs2_get_file_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { +static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { .set_lvb = ocfs2_set_qinfo_lvb, .get_osb = ocfs2_get_qinfo_osb, .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { +static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { .check_downconvert = ocfs2_check_refcount_downconvert, .downconvert_worker = ocfs2_refcount_convert_worker, .flags = 0, @@ -510,7 +510,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, - struct ocfs2_lock_res_ops *ops, + const struct ocfs2_lock_res_ops *ops, void *priv) { res->l_type = type; @@ -553,7 +553,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, unsigned int generation, struct inode *inode) { - struct ocfs2_lock_res_ops *ops; + const struct ocfs2_lock_res_ops *ops; switch(type) { case OCFS2_LOCK_TYPE_RW: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 4d1ea8703fcd..59c92353151a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2189,8 +2189,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, * @osb: ocfs2 file system * @ret_orphan_dir: Orphan dir inode - returned locked! * @blkno: Actual block number of the inode to be inserted into orphan dir. + * @name: Buffer to store the name of the orphan. * @lookup: dir lookup result, to be passed back into functions like * ocfs2_orphan_add + * @dio: Flag indicating if direct IO is being used or not. * * Returns zero on success and the ret_orphan_dir, name and lookup * fields will be populated. diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8fe826143d7b..51c52768132d 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -154,7 +154,7 @@ struct ocfs2_lock_stats { struct ocfs2_lock_res { void *l_priv; - struct ocfs2_lock_res_ops *l_ops; + const struct ocfs2_lock_res_ops *l_ops; struct list_head l_blocked_list; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index c973c03f6fd8..10157d9d7a9c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations o2cb_stack_ops = { +static const struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, .this_node = o2cb_cluster_this_node, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index c11406cd87a8..77edcd70f72c 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { +static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 3636847fae19..02ab072c528a 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -223,7 +223,7 @@ struct ocfs2_stack_operations { */ struct ocfs2_stack_plugin { char *sp_name; - struct ocfs2_stack_operations *sp_ops; + const struct ocfs2_stack_operations *sp_ops; struct module *sp_owner; /* These are managed by the stackglue code. */ diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3b81213ed7b8..35c0cc2a51af 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1062,13 +1062,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry, return i_ret + b_ret; } -static int ocfs2_xattr_find_entry(int name_index, +static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs) { struct ocfs2_xattr_entry *entry; size_t name_len; - int i, cmp = 1; + int i, name_offset, cmp = 1; if (name == NULL) return -EINVAL; @@ -1076,13 +1076,22 @@ static int ocfs2_xattr_find_entry(int name_index, name_len = strlen(name); entry = xs->here; for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { + if ((void *)entry >= xs->end) { + ocfs2_error(inode->i_sb, "corrupted xattr entries"); + return -EFSCORRUPTED; + } cmp = name_index - ocfs2_xattr_get_type(entry); if (!cmp) cmp = name_len - entry->xe_name_len; - if (!cmp) - cmp = memcmp(name, (xs->base + - le16_to_cpu(entry->xe_name_offset)), - name_len); + if (!cmp) { + name_offset = le16_to_cpu(entry->xe_name_offset); + if ((xs->base + name_offset + name_len) > xs->end) { + ocfs2_error(inode->i_sb, + "corrupted xattr entries"); + return -EFSCORRUPTED; + } + cmp = memcmp(name, (xs->base + name_offset), name_len); + } if (cmp == 0) break; entry += 1; @@ -1166,7 +1175,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode, xs->base = (void *)xs->header; xs->here = xs->header->xh_entries; - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret) return ret; size = le64_to_cpu(xs->here->xe_value_size); @@ -2698,7 +2707,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode, /* Find the named attribute. */ if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret && ret != -ENODATA) return ret; xs->not_found = ret; @@ -2833,7 +2842,7 @@ static int ocfs2_xattr_block_find(struct inode *inode, xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; xs->here = xs->header->xh_entries; - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); } else ret = ocfs2_xattr_index_block_find(inode, blk_bh, name_index, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a71ac5379584..a8a8576d8592 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -13,6 +13,7 @@ #include <linux/binfmts.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> +#include <linux/mm.h> struct ctl_table_header; struct mempolicy; @@ -142,6 +143,38 @@ unsigned name_to_int(const struct qstr *qstr); /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 +/** + * folio_precise_page_mapcount() - Number of mappings of this folio page. + * @folio: The folio. + * @page: The page. + * + * The number of present user page table entries that reference this page + * as tracked via the RMAP: either referenced directly (PTE) or as part of + * a larger area that covers this page (e.g., PMD). + * + * Use this function only for the calculation of existing statistics + * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount). + * + * Do not add new users. + * + * Returns: The number of mappings of this folio page. 0 for + * folios that are not mapped to user space or are not tracked via the RMAP + * (e.g., shared zeropage). + */ +static inline int folio_precise_page_mapcount(struct folio *folio, + struct page *page) +{ + int mapcount = atomic_read(&page->_mapcount) + 1; + + /* Handle page_has_type() pages */ + if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + mapcount = 0; + if (folio_test_large(folio)) + mapcount += folio_entire_mapcount(folio); + + return mapcount; +} + /* * array.c */ diff --git a/fs/proc/page.c b/fs/proc/page.c index 2fb64bdb64eb..b7a5c84b5819 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -37,21 +37,19 @@ static inline unsigned long get_max_dump_pfn(void) #endif } -/* /proc/kpagecount - an array exposing page counts +/* /proc/kpagecount - an array exposing page mapcounts * * Each entry is a u64 representing the corresponding - * physical page count. + * physical page mapcount. */ static ssize_t kpagecount_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; - struct page *ppage; unsigned long src = *ppos; unsigned long pfn; ssize_t ret = 0; - u64 pcount; pfn = src / KPMSIZE; if (src & KPMMASK || count & KPMMASK) @@ -61,18 +59,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { + struct page *page; + u64 mapcount = 0; + /* * TODO: ZONE_DEVICE support requires to identify * memmaps that were actually initialized. */ - ppage = pfn_to_online_page(pfn); - - if (!ppage) - pcount = 0; - else - pcount = page_mapcount(ppage); + page = pfn_to_online_page(pfn); + if (page) + mapcount = folio_precise_page_mapcount(page_folio(page), + page); - if (put_user(pcount, out)) { + if (put_user(mapcount, out)) { ret = -EFAULT; break; } @@ -148,19 +147,16 @@ u64 stable_page_flags(const struct page *page) u |= 1 << KPF_COMPOUND_TAIL; if (folio_test_hugetlb(folio)) u |= 1 << KPF_HUGE; - /* - * We need to check PageLRU/PageAnon - * to make sure a given page is a thp, not a non-huge compound page. - */ - else if (folio_test_large(folio)) { - if ((k & (1 << PG_lru)) || is_anon) - u |= 1 << KPF_THP; - else if (is_huge_zero_folio(folio)) { - u |= 1 << KPF_ZERO_PAGE; - u |= 1 << KPF_THP; - } - } else if (is_zero_pfn(page_to_pfn(page))) + else if (folio_test_large(folio) && + folio_test_large_rmappable(folio)) { + /* Note: we indicate any THPs here, not just PMD-sized ones */ + u |= 1 << KPF_THP; + } else if (is_huge_zero_folio(folio)) { u |= 1 << KPF_ZERO_PAGE; + u |= 1 << KPF_THP; + } else if (is_zero_folio(folio)) { + u |= 1 << KPF_ZERO_PAGE; + } /* * Caveats on high order pages: PG_buddy and PG_slab will only be set diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 71e5039d940d..775a2e8d600c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -22,6 +22,7 @@ #include <linux/pkeys.h> #include <linux/minmax.h> #include <linux/overflow.h> +#include <linux/buildid.h> #include <asm/elf.h> #include <asm/tlb.h> @@ -239,6 +240,67 @@ static int do_maps_open(struct inode *inode, struct file *file, sizeof(struct proc_maps_private)); } +static void get_vma_name(struct vm_area_struct *vma, + const struct path **path, + const char **name, + const char **name_fmt) +{ + struct anon_vma_name *anon_name = vma->vm_mm ? anon_vma_name(vma) : NULL; + + *name = NULL; + *path = NULL; + *name_fmt = NULL; + + /* + * Print the dentry name for named mappings, and a + * special [heap] marker for the heap: + */ + if (vma->vm_file) { + /* + * If user named this anon shared memory via + * prctl(PR_SET_VMA ..., use the provided name. + */ + if (anon_name) { + *name_fmt = "[anon_shmem:%s]"; + *name = anon_name->name; + } else { + *path = file_user_path(vma->vm_file); + } + return; + } + + if (vma->vm_ops && vma->vm_ops->name) { + *name = vma->vm_ops->name(vma); + if (*name) + return; + } + + *name = arch_vma_name(vma); + if (*name) + return; + + if (!vma->vm_mm) { + *name = "[vdso]"; + return; + } + + if (vma_is_initial_heap(vma)) { + *name = "[heap]"; + return; + } + + if (vma_is_initial_stack(vma)) { + *name = "[stack]"; + return; + } + + if (anon_name) { + *name_fmt = "[anon:%s]"; + *name = anon_name->name; + return; + } +} + static void show_vma_header_prefix(struct seq_file *m, unsigned long start, unsigned long end, vm_flags_t flags, unsigned long long pgoff, @@ -262,17 +324,15 @@ static void show_vma_header_prefix(struct seq_file *m, static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { - struct anon_vma_name *anon_name = NULL; - struct mm_struct *mm = vma->vm_mm; - struct file *file = vma->vm_file; + const struct path *path; + const char *name_fmt, *name; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; - const char *name = NULL; - if (file) { + if (vma->vm_file) { const struct inode *inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; @@ -283,57 +343,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) start = vma->vm_start; end = vma->vm_end; show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); - if (mm) - anon_name = anon_vma_name(vma); - /* - * Print the dentry name for named mappings, and a - * special [heap] marker for the heap: - */ - if (file) { + get_vma_name(vma, &path, &name, &name_fmt); + if (path) { seq_pad(m, ' '); - /* - * If user named this anon shared memory via - * prctl(PR_SET_VMA ..., use the provided name. - */ - if (anon_name) - seq_printf(m, "[anon_shmem:%s]", anon_name->name); - else - seq_path(m, file_user_path(file), "\n"); - goto done; - } - - if (vma->vm_ops && vma->vm_ops->name) { - name = vma->vm_ops->name(vma); - if (name) - goto done; - } - - name = arch_vma_name(vma); - if (!name) { - if (!mm) { - name = "[vdso]"; - goto done; - } - - if (vma_is_initial_heap(vma)) { - name = "[heap]"; - goto done; - } - - if (vma_is_initial_stack(vma)) { - name = "[stack]"; - goto done; - } - - if (anon_name) { - seq_pad(m, ' '); - seq_printf(m, "[anon:%s]", anon_name->name); - } - } - -done: - if (name) { + seq_path(m, path, "\n"); + } else if (name_fmt) { + seq_pad(m, ' '); + seq_printf(m, name_fmt, name); + } else if (name) { seq_pad(m, ' '); seq_puts(m, name); } @@ -358,11 +376,268 @@ static int pid_maps_open(struct inode *inode, struct file *file) return do_maps_open(inode, file, &proc_pid_maps_op); } +#define PROCMAP_QUERY_VMA_FLAGS ( \ + PROCMAP_QUERY_VMA_READABLE | \ + PROCMAP_QUERY_VMA_WRITABLE | \ + PROCMAP_QUERY_VMA_EXECUTABLE | \ + PROCMAP_QUERY_VMA_SHARED \ +) + +#define PROCMAP_QUERY_VALID_FLAGS_MASK ( \ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA | \ + PROCMAP_QUERY_FILE_BACKED_VMA | \ + PROCMAP_QUERY_VMA_FLAGS \ +) + +static int query_vma_setup(struct mm_struct *mm) +{ + return mmap_read_lock_killable(mm); +} + +static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) +{ + mmap_read_unlock(mm); +} + +static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) +{ + return find_vma(mm, addr); +} + +static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, + unsigned long addr, u32 flags) +{ + struct vm_area_struct *vma; + +next_vma: + vma = query_vma_find_by_addr(mm, addr); + if (!vma) + goto no_vma; + + /* user requested only file-backed VMA, keep iterating */ + if ((flags & PROCMAP_QUERY_FILE_BACKED_VMA) && !vma->vm_file) + goto skip_vma; + + /* VMA permissions should satisfy query flags */ + if (flags & PROCMAP_QUERY_VMA_FLAGS) { + u32 perm = 0; + + if (flags & PROCMAP_QUERY_VMA_READABLE) + perm |= VM_READ; + if (flags & PROCMAP_QUERY_VMA_WRITABLE) + perm |= VM_WRITE; + if (flags & PROCMAP_QUERY_VMA_EXECUTABLE) + perm |= VM_EXEC; + if (flags & PROCMAP_QUERY_VMA_SHARED) + perm |= VM_MAYSHARE; + + if ((vma->vm_flags & perm) != perm) + goto skip_vma; + } + + /* found covering VMA or user is OK with the matching next VMA */ + if ((flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) || vma->vm_start <= addr) + return vma; + +skip_vma: + /* + * If the user needs closest matching VMA, keep iterating. + */ + addr = vma->vm_end; + if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) + goto next_vma; + +no_vma: + return ERR_PTR(-ENOENT); +} + +static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) +{ + struct procmap_query karg; + struct vm_area_struct *vma; + struct mm_struct *mm; + const char *name = NULL; + char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; + __u64 usize; + int err; + + if (copy_from_user(&usize, (void __user *)uarg, sizeof(usize))) + return -EFAULT; + /* argument struct can never be that large, reject abuse */ + if (usize > PAGE_SIZE) + return -E2BIG; + /* argument struct should have at least query_flags and query_addr fields */ + if (usize < offsetofend(struct procmap_query, query_addr)) + return -EINVAL; + err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); + if (err) + return err; + + /* reject unknown flags */ + if (karg.query_flags & ~PROCMAP_QUERY_VALID_FLAGS_MASK) + return -EINVAL; + /* either both buffer address and size are set, or both should be zero */ + if (!!karg.vma_name_size != !!karg.vma_name_addr) + return -EINVAL; + if (!!karg.build_id_size != !!karg.build_id_addr) + return -EINVAL; + + mm = priv->mm; + if (!mm || !mmget_not_zero(mm)) + return -ESRCH; + + err = query_vma_setup(mm); + if (err) { + mmput(mm); + return err; + } + + vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + vma = NULL; + goto out; + } + + karg.vma_start = vma->vm_start; + karg.vma_end = vma->vm_end; + + karg.vma_flags = 0; + if (vma->vm_flags & VM_READ) + karg.vma_flags |= PROCMAP_QUERY_VMA_READABLE; + if (vma->vm_flags & VM_WRITE) + karg.vma_flags |= PROCMAP_QUERY_VMA_WRITABLE; + if (vma->vm_flags & VM_EXEC) + karg.vma_flags |= PROCMAP_QUERY_VMA_EXECUTABLE; + if (vma->vm_flags & VM_MAYSHARE) + karg.vma_flags |= PROCMAP_QUERY_VMA_SHARED; + + karg.vma_page_size = vma_kernel_pagesize(vma); + + if (vma->vm_file) { + const struct inode *inode = file_user_inode(vma->vm_file); + + karg.vma_offset = ((__u64)vma->vm_pgoff) << PAGE_SHIFT; + karg.dev_major = MAJOR(inode->i_sb->s_dev); + karg.dev_minor = MINOR(inode->i_sb->s_dev); + karg.inode = inode->i_ino; + } else { + karg.vma_offset = 0; + karg.dev_major = 0; + karg.dev_minor = 0; + karg.inode = 0; + } + + if (karg.build_id_size) { + __u32 build_id_sz; + + err = build_id_parse(vma, build_id_buf, &build_id_sz); + if (err) { + karg.build_id_size = 0; + } else { + if (karg.build_id_size < build_id_sz) { + err = -ENAMETOOLONG; + goto out; + } + karg.build_id_size = build_id_sz; + } + } + + if (karg.build_id_size) { + __u32 build_id_sz; + + err = build_id_parse(vma, build_id_buf, &build_id_sz); + if (err) { + karg.build_id_size = 0; + } else { + if (karg.build_id_size < build_id_sz) { + err = -ENAMETOOLONG; + goto out; + } + karg.build_id_size = build_id_sz; + } + } + + if (karg.vma_name_size) { + size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size); + const struct path *path; + const char *name_fmt; + size_t name_sz = 0; + + get_vma_name(vma, &path, &name, &name_fmt); + + if (path || name_fmt || name) { + name_buf = kmalloc(name_buf_sz, GFP_KERNEL); + if (!name_buf) { + err = -ENOMEM; + goto out; + } + } + if (path) { + name = d_path(path, name_buf, name_buf_sz); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + name_sz = name_buf + name_buf_sz - name; + } else if (name || name_fmt) { + name_sz = 1 + snprintf(name_buf, name_buf_sz, name_fmt ?: "%s", name); + name = name_buf; + } + if (name_sz > name_buf_sz) { + err = -ENAMETOOLONG; + goto out; + } + karg.vma_name_size = name_sz; + } + + /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ + query_vma_teardown(mm, vma); + mmput(mm); + + if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), + name, karg.vma_name_size)) { + kfree(name_buf); + return -EFAULT; + } + kfree(name_buf); + + if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr), + build_id_buf, karg.build_id_size)) + return -EFAULT; + + if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize))) + return -EFAULT; + + return 0; + +out: + query_vma_teardown(mm, vma); + mmput(mm); + kfree(name_buf); + return err; +} + +static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + + switch (cmd) { + case PROCMAP_QUERY: + return do_procmap_query(priv, (void __user *)arg); + default: + return -ENOIOCTLCMD; + } +} + const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, + .unlocked_ioctl = procfs_procmap_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; /* @@ -442,7 +717,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked, - bool migration) + bool present) { struct folio *folio = page_folio(page); int i, nr = compound ? compound_nr(page) : 1; @@ -471,24 +746,29 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, * Then accumulate quantities that may depend on sharing, or that may * differ page-by-page. * - * refcount == 1 guarantees the page is mapped exactly once. - * If any subpage of the compound page mapped with PTE it would elevate - * the refcount. + * refcount == 1 for present entries guarantees that the folio is mapped + * exactly once. For large folios this implies that exactly one + * PTE/PMD/... maps (a part of) this folio. * - * The page_mapcount() is called to get a snapshot of the mapcount. - * Without holding the page lock this snapshot can be slightly wrong as - * we cannot always read the mapcount atomically. It is not safe to - * call page_mapcount() even with PTL held if the page is not mapped, - * especially for migration entries. Treat regular migration entries - * as mapcount == 1. + * Treat all non-present entries (where relying on the mapcount and + * refcount doesn't make sense) as "maybe shared, but not sure how + * often". We treat device private entries as being fake-present. + * + * Note that it would not be safe to read the mapcount especially for + * pages referenced by migration entries, even with the PTL held. */ - if ((folio_ref_count(folio) == 1) || migration) { + if (folio_ref_count(folio) == 1 || !present) { smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT, - dirty, locked, true); + dirty, locked, present); return; } + /* + * We obtain a snapshot of the mapcount. Without holding the folio lock + * this snapshot can be slightly wrong as we cannot always read the + * mapcount atomically. + */ for (i = 0; i < nr; i++, page++) { - int mapcount = page_mapcount(page); + int mapcount = folio_precise_page_mapcount(folio, page); unsigned long pss = PAGE_SIZE << PSS_SHIFT; if (mapcount >= 2) pss /= mapcount; @@ -531,13 +811,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; - bool migration = false, young = false, dirty = false; + bool present = false, young = false, dirty = false; pte_t ptent = ptep_get(pte); if (pte_present(ptent)) { page = vm_normal_page(vma, addr, ptent); young = pte_young(ptent); dirty = pte_dirty(ptent); + present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); @@ -555,8 +836,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } } else if (is_pfn_swap_entry(swpent)) { - if (is_migration_entry(swpent)) - migration = true; + if (is_device_private_entry(swpent)) + present = true; page = pfn_swap_entry_to_page(swpent); } } else { @@ -567,7 +848,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, young, dirty, locked, migration); + smaps_account(mss, page, false, young, dirty, locked, present); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -578,18 +859,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool present = false; struct folio *folio; - bool migration = false; if (pmd_present(*pmd)) { page = vm_normal_page_pmd(vma, addr, *pmd); + present = true; } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - if (is_migration_entry(entry)) { - migration = true; + if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); - } } if (IS_ERR_OR_NULL(page)) return; @@ -604,7 +884,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, mss->file_thp += HPAGE_PMD_SIZE; smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), - locked, migration); + locked, present); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -733,19 +1013,23 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; - pte_t ptent = huge_ptep_get(pte); + pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; + bool present = false; if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); + present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); if (is_pfn_swap_entry(swpent)) folio = pfn_swap_entry_folio(swpent); } + if (folio) { - if (folio_likely_mapped_shared(folio) || + /* We treat non-present entries as "maybe shared". */ + if (!present || folio_likely_mapped_shared(folio) || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else @@ -1091,7 +1375,7 @@ struct clear_refs_private { static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - struct page *page; + struct folio *folio; if (!pte_write(pte)) return false; @@ -1099,10 +1383,10 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, return false; if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))) return false; - page = vm_normal_page(vma, addr, pte); - if (!page) + folio = vm_normal_folio(vma, addr, pte); + if (!folio) return false; - return page_maybe_dma_pinned(page); + return folio_maybe_dma_pinned(folio); } static inline void clear_soft_dirty(struct vm_area_struct *vma, @@ -1418,7 +1702,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, { u64 frame = 0, flags = 0; struct page *page = NULL; - bool migration = false; + struct folio *folio; if (pte_present(pte)) { if (pm->show_pfn) @@ -1450,17 +1734,20 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; - migration = is_migration_entry(entry); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); if (pte_marker_entry_uffd_wp(entry)) flags |= PM_UFFD_WP; } - if (page && !PageAnon(page)) - flags |= PM_FILE; - if (page && !migration && page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; + if (page) { + folio = page_folio(page); + if (!folio_test_anon(folio)) + flags |= PM_FILE; + if ((flags & PM_PRESENT) && + folio_precise_page_mapcount(folio, page) == 1) + flags |= PM_MMAP_EXCLUSIVE; + } if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1476,13 +1763,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, pte_t *pte, *orig_pte; int err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - bool migration = false; ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { + unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; struct page *page = NULL; + struct folio *folio = NULL; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1496,8 +1784,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_uffd_wp(pmd)) flags |= PM_UFFD_WP; if (pm->show_pfn) - frame = pmd_pfn(pmd) + - ((addr & ~PMD_MASK) >> PAGE_SHIFT); + frame = pmd_pfn(pmd) + idx; } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { @@ -1506,11 +1793,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pm->show_pfn) { if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry); + offset = swp_offset_pfn(entry) + idx; else - offset = swp_offset(entry); - offset = offset + - ((addr & ~PMD_MASK) >> PAGE_SHIFT); + offset = swp_offset(entry) + idx; frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } @@ -1520,17 +1805,25 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_BUG_ON(!is_pmd_migration_entry(pmd)); - migration = is_migration_entry(entry); page = pfn_swap_entry_to_page(entry); } #endif - if (page && !migration && page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; + if (page) { + folio = page_folio(page); + if (!folio_test_anon(folio)) + flags |= PM_FILE; + } + + for (; addr != end; addr += PAGE_SIZE, idx++) { + unsigned long cur_flags = flags; + pagemap_entry_t pme; - for (; addr != end; addr += PAGE_SIZE) { - pagemap_entry_t pme = make_pme(frame, flags); + if (folio && (flags & PM_PRESENT) && + folio_precise_page_mapcount(folio, page + idx) == 1) + cur_flags |= PM_MMAP_EXCLUSIVE; + pme = make_pme(frame, cur_flags); err = add_to_pagemap(&pme, pm); if (err) break; @@ -1585,7 +1878,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); @@ -2274,7 +2567,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask, if (~p->arg.flags & PM_SCAN_WP_MATCHING) { /* Go the short route when not write-protecting pages. */ - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) @@ -2286,7 +2579,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask, i_mmap_lock_write(vma->vm_file->f_mapping); ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep); - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) @@ -2566,7 +2859,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, unsigned long nr_pages) { struct folio *folio = page_folio(page); - int count = page_mapcount(page); + int count = folio_precise_page_mapcount(folio, page); md->pages += nr_pages; if (pte_dirty || folio_test_dirty(folio)) @@ -2682,7 +2975,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(pte); + pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); struct numa_maps *md; struct page *page; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 6397fdefd876..c92937bed133 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1359,7 +1359,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, target_tcon = tlink_tcon(smb_file_target->tlink); if (src_tcon->ses != target_tcon->ses) { - cifs_dbg(VFS, "source and target of copy not on same server\n"); + cifs_dbg(FYI, "source and target of copy not on same server\n"); goto out; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index a865941724c0..8e86fec7dcd2 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -290,7 +290,7 @@ struct smb_version_operations { int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); void (*add_credits)(struct TCP_Server_Info *server, - const struct cifs_credits *credits, + struct cifs_credits *credits, const int optype); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *, const int); @@ -550,8 +550,8 @@ struct smb_version_operations { size_t *, struct cifs_credits *); /* adjust previously taken mtu credits to request size */ int (*adjust_credits)(struct TCP_Server_Info *server, - struct cifs_credits *credits, - const unsigned int payload_size); + struct cifs_io_subrequest *subreq, + unsigned int /*enum smb3_rw_credits_trace*/ trace); /* check if we need to issue closedir */ bool (*dir_needs_close)(struct cifsFileInfo *); long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, @@ -848,6 +848,9 @@ static inline void cifs_server_unlock(struct TCP_Server_Info *server) struct cifs_credits { unsigned int value; unsigned int instance; + unsigned int in_flight_check; + unsigned int rreq_debug_id; + unsigned int rreq_debug_index; }; static inline unsigned int @@ -873,7 +876,7 @@ has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) } static inline void -add_credits(struct TCP_Server_Info *server, const struct cifs_credits *credits, +add_credits(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { server->ops->add_credits(server, credits, optype); @@ -897,11 +900,11 @@ set_credits(struct TCP_Server_Info *server, const int val) } static inline int -adjust_credits(struct TCP_Server_Info *server, struct cifs_credits *credits, - const unsigned int payload_size) +adjust_credits(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, + unsigned int /* enum smb3_rw_credits_trace */ trace) { return server->ops->adjust_credits ? - server->ops->adjust_credits(server, credits, payload_size) : 0; + server->ops->adjust_credits(server, subreq, trace) : 0; } static inline __le64 diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 1374635e89fa..b2405dd4d4d4 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -80,6 +80,16 @@ retry: return netfs_prepare_write_failed(subreq); } + wdata->credits.rreq_debug_id = subreq->rreq->debug_id; + wdata->credits.rreq_debug_index = subreq->debug_index; + wdata->credits.in_flight_check = 1; + trace_smb3_rw_credits(wdata->rreq->debug_id, + wdata->subreq.debug_index, + wdata->credits.value, + server->credits, server->in_flight, + wdata->credits.value, + cifs_trace_rw_credits_write_prepare); + #ifdef CONFIG_CIFS_SMB_DIRECT if (server->smbd_conn) subreq->max_nr_segs = server->smbd_conn->max_frmr_depth; @@ -101,7 +111,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq) goto fail; } - rc = adjust_credits(wdata->server, &wdata->credits, wdata->subreq.len); + rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust); if (rc) goto fail; @@ -123,6 +133,11 @@ fail: goto out; } +static void cifs_netfs_invalidate_cache(struct netfs_io_request *wreq) +{ + cifs_invalidate_cache(wreq->inode, 0); +} + /* * Split the read up according to how many credits we can get for each piece. * It's okay to sleep here if we need to wait for more credit to become @@ -158,7 +173,18 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) return false; } + rdata->credits.in_flight_check = 1; + rdata->credits.rreq_debug_id = rreq->debug_id; + rdata->credits.rreq_debug_index = subreq->debug_index; + + trace_smb3_rw_credits(rdata->rreq->debug_id, + rdata->subreq.debug_index, + rdata->credits.value, + server->credits, server->in_flight, 0, + cifs_trace_rw_credits_read_submit); + subreq->len = min_t(size_t, subreq->len, rsize); + #ifdef CONFIG_CIFS_SMB_DIRECT if (server->smbd_conn) subreq->max_nr_segs = server->smbd_conn->max_frmr_depth; @@ -289,6 +315,15 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } + if (rdata->credits.value != 0) + trace_smb3_rw_credits(rdata->rreq->debug_id, + rdata->subreq.debug_index, + rdata->credits.value, + rdata->server ? rdata->server->credits : 0, + rdata->server ? rdata->server->in_flight : 0, + -rdata->credits.value, + cifs_trace_rw_credits_free_subreq); + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); @@ -307,6 +342,7 @@ const struct netfs_request_ops cifs_req_ops = { .begin_writeback = cifs_begin_writeback, .prepare_write = cifs_prepare_write, .issue_write = cifs_issue_write, + .invalidate_cache = cifs_netfs_invalidate_cache, }; /* @@ -2358,13 +2394,18 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t bool was_async) { struct netfs_io_request *wreq = wdata->rreq; - loff_t new_server_eof; + struct netfs_inode *ictx = netfs_inode(wreq->inode); + loff_t wrend; if (result > 0) { - new_server_eof = wdata->subreq.start + wdata->subreq.transferred + result; + wrend = wdata->subreq.start + wdata->subreq.transferred + result; - if (new_server_eof > netfs_inode(wreq->inode)->remote_i_size) - netfs_resize_file(netfs_inode(wreq->inode), new_server_eof, true); + if (wrend > ictx->zero_point && + (wdata->rreq->origin == NETFS_UNBUFFERED_WRITE || + wdata->rreq->origin == NETFS_DIO_WRITE)) + ictx->zero_point = wrend; + if (wrend > ictx->remote_i_size) + netfs_resize_file(ictx, wrend, true); } netfs_write_subrequest_terminated(&wdata->subreq, result, was_async); @@ -2877,6 +2918,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) rc = netfs_start_io_direct(inode); if (rc < 0) goto out; + rc = -EACCES; down_read(&cinode->lock_sem); if (!cifs_find_lock_conflict( cfile, iocb->ki_pos, iov_iter_count(to), @@ -2889,6 +2931,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) rc = netfs_start_io_read(inode); if (rc < 0) goto out; + rc = -EACCES; down_read(&cinode->lock_sem); if (!cifs_find_lock_conflict( cfile, iocb->ki_pos, iov_iter_count(to), diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 212ec6f66ec6..e1f2feb56f45 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -108,7 +108,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) static void cifs_add_credits(struct TCP_Server_Info *server, - const struct cifs_credits *credits, const int optype) + struct cifs_credits *credits, const int optype) { spin_lock(&server->req_lock); server->credits += credits->value; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index c8e536540895..7fe59235f090 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -66,7 +66,7 @@ change_conf(struct TCP_Server_Info *server) static void smb2_add_credits(struct TCP_Server_Info *server, - const struct cifs_credits *credits, const int optype) + struct cifs_credits *credits, const int optype) { int *val, rc = -1; int scredits, in_flight; @@ -94,7 +94,21 @@ smb2_add_credits(struct TCP_Server_Info *server, server->conn_id, server->hostname, *val, add, server->in_flight); } - WARN_ON_ONCE(server->in_flight == 0); + if (credits->in_flight_check > 1) { + pr_warn_once("rreq R=%08x[%x] Credits not in flight\n", + credits->rreq_debug_id, credits->rreq_debug_index); + } else { + credits->in_flight_check = 2; + } + if (WARN_ON_ONCE(server->in_flight == 0)) { + pr_warn_once("rreq R=%08x[%x] Zero in_flight\n", + credits->rreq_debug_id, credits->rreq_debug_index); + trace_smb3_rw_credits(credits->rreq_debug_id, + credits->rreq_debug_index, + credits->value, + server->credits, server->in_flight, 0, + cifs_trace_rw_credits_zero_in_flight); + } server->in_flight--; if (server->in_flight == 0 && ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) && @@ -283,16 +297,23 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, static int smb2_adjust_credits(struct TCP_Server_Info *server, - struct cifs_credits *credits, - const unsigned int payload_size) + struct cifs_io_subrequest *subreq, + unsigned int /*enum smb3_rw_credits_trace*/ trace) { - int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE); + struct cifs_credits *credits = &subreq->credits; + int new_val = DIV_ROUND_UP(subreq->subreq.len, SMB2_MAX_BUFFER_SIZE); int scredits, in_flight; if (!credits->value || credits->value == new_val) return 0; if (credits->value < new_val) { + trace_smb3_rw_credits(subreq->rreq->debug_id, + subreq->subreq.debug_index, + credits->value, + server->credits, server->in_flight, + new_val - credits->value, + cifs_trace_rw_credits_no_adjust_up); trace_smb3_too_many_credits(server->CurrentMid, server->conn_id, server->hostname, 0, credits->value - new_val, 0); cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)", @@ -308,6 +329,12 @@ smb2_adjust_credits(struct TCP_Server_Info *server, in_flight = server->in_flight; spin_unlock(&server->req_lock); + trace_smb3_rw_credits(subreq->rreq->debug_id, + subreq->subreq.debug_index, + credits->value, + server->credits, server->in_flight, + new_val - credits->value, + cifs_trace_rw_credits_old_session); trace_smb3_reconnect_detected(server->CurrentMid, server->conn_id, server->hostname, scredits, credits->value - new_val, in_flight); @@ -316,6 +343,11 @@ smb2_adjust_credits(struct TCP_Server_Info *server, return -EAGAIN; } + trace_smb3_rw_credits(subreq->rreq->debug_id, + subreq->subreq.debug_index, + credits->value, + server->credits, server->in_flight, + new_val - credits->value, trace); server->credits += credits->value - new_val; scredits = server->credits; in_flight = server->in_flight; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2ae2dbb6202b..9fc5b11c0b6c 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4502,8 +4502,15 @@ smb2_readv_callback(struct mid_q_entry *mid) struct TCP_Server_Info *server = rdata->server; struct smb2_hdr *shdr = (struct smb2_hdr *)rdata->iov[0].iov_base; - struct cifs_credits credits = { .value = 0, .instance = 0 }; + struct cifs_credits credits = { + .value = 0, + .instance = 0, + .rreq_debug_id = rdata->rreq->debug_id, + .rreq_debug_index = rdata->subreq.debug_index, + }; struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], .rq_nvec = 1 }; + unsigned int rreq_debug_id = rdata->rreq->debug_id; + unsigned int subreq_debug_index = rdata->subreq.debug_index; if (rdata->got_bytes) { rqst.rq_iter = rdata->subreq.io_iter; @@ -4587,10 +4594,16 @@ smb2_readv_callback(struct mid_q_entry *mid) if (rdata->subreq.start < rdata->subreq.rreq->i_size) rdata->result = 0; } + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value, + server->credits, server->in_flight, + 0, cifs_trace_rw_credits_read_response_clear); rdata->credits.value = 0; INIT_WORK(&rdata->subreq.work, smb2_readv_worker); queue_work(cifsiod_wq, &rdata->subreq.work); release_mid(mid); + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, + server->credits, server->in_flight, + credits.value, cifs_trace_rw_credits_read_response_add); add_credits(server, &credits, 0); } @@ -4647,7 +4660,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) min_t(int, server->max_credits - server->credits, credit_request)); - rc = adjust_credits(server, &rdata->credits, rdata->subreq.len); + rc = adjust_credits(server, rdata, cifs_trace_rw_credits_call_readv_adjust); if (rc) goto async_readv_out; @@ -4766,7 +4779,14 @@ smb2_writev_callback(struct mid_q_entry *mid) struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink); struct TCP_Server_Info *server = wdata->server; struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; - struct cifs_credits credits = { .value = 0, .instance = 0 }; + struct cifs_credits credits = { + .value = 0, + .instance = 0, + .rreq_debug_id = wdata->rreq->debug_id, + .rreq_debug_index = wdata->subreq.debug_index, + }; + unsigned int rreq_debug_id = wdata->rreq->debug_id; + unsigned int subreq_debug_index = wdata->subreq.debug_index; ssize_t result = 0; size_t written; @@ -4837,9 +4857,15 @@ smb2_writev_callback(struct mid_q_entry *mid) tcon->tid, tcon->ses->Suid, wdata->subreq.start, wdata->subreq.len); + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, wdata->credits.value, + server->credits, server->in_flight, + 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; cifs_write_subrequest_terminated(wdata, result ?: written, true); release_mid(mid); + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, + server->credits, server->in_flight, + credits.value, cifs_trace_rw_credits_write_response_add); add_credits(server, &credits, 0); } @@ -4859,9 +4885,6 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) struct cifs_io_parms *io_parms = NULL; int credit_request; - if (!wdata->server || test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags)) - server = wdata->server = cifs_pick_channel(tcon->ses); - /* * in future we may get cifs_io_parms passed in from the caller, * but for now we construct it here... @@ -4972,7 +4995,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) min_t(int, server->max_credits - server->credits, credit_request)); - rc = adjust_credits(server, &wdata->credits, io_parms->length); + rc = adjust_credits(server, wdata, cifs_trace_rw_credits_call_writev_adjust); if (rc) goto async_writev_out; @@ -4997,6 +5020,12 @@ async_writev_out: cifs_small_buf_release(req); out: if (rc) { + trace_smb3_rw_credits(wdata->rreq->debug_id, + wdata->subreq.debug_index, + wdata->credits.value, + server->credits, server->in_flight, + -(int)wdata->credits.value, + cifs_trace_rw_credits_write_response_clear); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); cifs_write_subrequest_terminated(wdata, rc, true); } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 36d47ce59631..36d5295c2a6f 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -20,6 +20,22 @@ /* * Specify enums for tracing information. */ +#define smb3_rw_credits_traces \ + EM(cifs_trace_rw_credits_call_readv_adjust, "rd-call-adj") \ + EM(cifs_trace_rw_credits_call_writev_adjust, "wr-call-adj") \ + EM(cifs_trace_rw_credits_free_subreq, "free-subreq") \ + EM(cifs_trace_rw_credits_issue_read_adjust, "rd-issu-adj") \ + EM(cifs_trace_rw_credits_issue_write_adjust, "wr-issu-adj") \ + EM(cifs_trace_rw_credits_no_adjust_up, "no-adj-up ") \ + EM(cifs_trace_rw_credits_old_session, "old-session") \ + EM(cifs_trace_rw_credits_read_response_add, "rd-resp-add") \ + EM(cifs_trace_rw_credits_read_response_clear, "rd-resp-clr") \ + EM(cifs_trace_rw_credits_read_submit, "rd-submit ") \ + EM(cifs_trace_rw_credits_write_prepare, "wr-prepare ") \ + EM(cifs_trace_rw_credits_write_response_add, "wr-resp-add") \ + EM(cifs_trace_rw_credits_write_response_clear, "wr-resp-clr") \ + E_(cifs_trace_rw_credits_zero_in_flight, "ZERO-IN-FLT") + #define smb3_tcon_ref_traces \ EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \ EM(netfs_trace_tcon_ref_free, "FRE ") \ @@ -59,7 +75,8 @@ #define EM(a, b) a, #define E_(a, b) a -enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); +enum smb3_rw_credits_trace { smb3_rw_credits_traces } __mode(byte); +enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); #undef EM #undef E_ @@ -71,6 +88,7 @@ enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +smb3_rw_credits_traces; smb3_tcon_ref_traces; #undef EM @@ -1316,6 +1334,41 @@ TRACE_EVENT(smb3_tcon_ref, __entry->ref) ); +TRACE_EVENT(smb3_rw_credits, + TP_PROTO(unsigned int rreq_debug_id, + unsigned int subreq_debug_index, + unsigned int subreq_credits, + unsigned int server_credits, + int server_in_flight, + int credit_change, + enum smb3_rw_credits_trace trace), + TP_ARGS(rreq_debug_id, subreq_debug_index, subreq_credits, + server_credits, server_in_flight, credit_change, trace), + TP_STRUCT__entry( + __field(unsigned int, rreq_debug_id) + __field(unsigned int, subreq_debug_index) + __field(unsigned int, subreq_credits) + __field(unsigned int, server_credits) + __field(int, in_flight) + __field(int, credit_change) + __field(enum smb3_rw_credits_trace, trace) + ), + TP_fast_assign( + __entry->rreq_debug_id = rreq_debug_id; + __entry->subreq_debug_index = subreq_debug_index; + __entry->subreq_credits = subreq_credits; + __entry->server_credits = server_credits; + __entry->in_flight = server_in_flight; + __entry->credit_change = credit_change; + __entry->trace = trace; + ), + TP_printk("R=%08x[%x] %s cred=%u chg=%d pool=%u ifl=%d", + __entry->rreq_debug_id, __entry->subreq_debug_index, + __print_symbolic(__entry->trace, smb3_rw_credits_traces), + __entry->subreq_credits, __entry->credit_change, + __entry->server_credits, __entry->in_flight) + ); + #undef EM #undef E_ diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 012b9bd06995..adfe0d058701 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -988,10 +988,10 @@ static void cifs_compound_callback(struct mid_q_entry *mid) { struct TCP_Server_Info *server = mid->server; - struct cifs_credits credits; - - credits.value = server->ops->get_credits(mid); - credits.instance = server->reconnect_instance; + struct cifs_credits credits = { + .value = server->ops->get_credits(mid), + .instance = server->reconnect_instance, + }; add_credits(server, &credits, mid->optype); diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 0e04cf8b1d89..5c2845e47cf2 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -133,8 +133,8 @@ struct ksmbd_transport_ops { }; struct ksmbd_transport { - struct ksmbd_conn *conn; - struct ksmbd_transport_ops *ops; + struct ksmbd_conn *conn; + const struct ksmbd_transport_ops *ops; }; #define KSMBD_TCP_RECV_TIMEOUT (7 * HZ) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index aec0a7a12405..162a12685d2c 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -149,6 +149,7 @@ void ksmbd_session_destroy(struct ksmbd_session *sess) ksmbd_tree_conn_session_logoff(sess); ksmbd_destroy_file_table(&sess->file_table); + ksmbd_launch_ksmbd_durable_scavenger(); ksmbd_session_rpc_clear_list(sess); free_channel_list(sess); kfree(sess->Preauth_HashValue); @@ -326,6 +327,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); up_write(&sessions_table_lock); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index e9da63f25b20..72bc88a63a40 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -11,13 +11,6 @@ #define OPLOCK_WAIT_TIME (35 * HZ) -/* SMB2 Oplock levels */ -#define SMB2_OPLOCK_LEVEL_NONE 0x00 -#define SMB2_OPLOCK_LEVEL_II 0x01 -#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 -#define SMB2_OPLOCK_LEVEL_BATCH 0x09 -#define SMB2_OPLOCK_LEVEL_LEASE 0xFF - /* Oplock states */ #define OPLOCK_STATE_NONE 0x00 #define OPLOCK_ACK_WAIT 0x01 diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c67fbc8d6683..4d24cc105ef6 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -377,6 +377,7 @@ static void server_ctrl_handle_reset(struct server_ctrl_struct *ctrl) { ksmbd_ipc_soft_reset(); ksmbd_conn_transport_destroy(); + ksmbd_stop_durable_scavenger(); server_conf_free(); server_conf_init(); WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP); diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h index db7278181760..4fc529335271 100644 --- a/fs/smb/server/server.h +++ b/fs/smb/server/server.h @@ -44,6 +44,7 @@ struct ksmbd_server_config { unsigned int max_connections; char *conf[SERVER_CONF_WORK_GROUP + 1]; + struct task_struct *dh_task; }; extern struct ksmbd_server_config server_conf; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 840c71c66b30..37a39ab4ee65 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3526,7 +3526,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_GUID_SIZE); if (dh_info.timeout) fp->durable_timeout = min(dh_info.timeout, - 300000); + DURABLE_HANDLE_MAX_TIMEOUT); else fp->durable_timeout = 60; } diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 643f5e1cfe35..3be7d5ae65a8 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -72,6 +72,8 @@ struct create_durable_req_v2 { __u8 CreateGuid[16]; } __packed; +#define DURABLE_HANDLE_MAX_TIMEOUT 300000 + struct create_durable_reconn_req { struct create_context_hdr ccontext; __u8 Name[8]; diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8faa25c6e129..cf4418f72772 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -164,7 +164,7 @@ enum { SMB_DIRECT_MSG_DATA_TRANSFER }; -static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; +static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; struct smb_direct_send_ctx { struct list_head msg_list; @@ -2292,7 +2292,7 @@ out: return rdma_capable; } -static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { +static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .prepare = smb_direct_prepare, .disconnect = smb_direct_disconnect, .shutdown = smb_direct_shutdown, diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 6633fa78e9b9..a84788396daa 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -37,7 +37,7 @@ struct tcp_transport { unsigned int nr_iov; }; -static struct ksmbd_transport_ops ksmbd_tcp_transport_ops; +static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops; static void tcp_stop_kthread(struct task_struct *kthread); static struct interface *alloc_iface(char *ifname); @@ -649,7 +649,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz) return 0; } -static struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { +static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { .read = ksmbd_tcp_read, .writev = ksmbd_tcp_writev, .disconnect = ksmbd_tcp_disconnect, diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 8b2e37c8716e..4d4ee696e37c 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -8,6 +8,8 @@ #include <linux/filelock.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/kthread.h> +#include <linux/freezer.h> #include "glob.h" #include "vfs_cache.h" @@ -17,6 +19,7 @@ #include "mgmt/tree_connect.h" #include "mgmt/user_session.h" #include "smb_common.h" +#include "server.h" #define S_DEL_PENDING 1 #define S_DEL_ON_CLS 2 @@ -31,6 +34,10 @@ static struct ksmbd_file_table global_ft; static atomic_long_t fd_limit; static struct kmem_cache *filp_cache; +static bool durable_scavenger_running; +static DEFINE_MUTEX(durable_scavenger_lock); +static wait_queue_head_t dh_wq; + void ksmbd_set_fd_limit(unsigned long limit) { limit = min(limit, get_max_files()); @@ -280,9 +287,16 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) if (!has_file_id(fp->persistent_id)) return; - write_lock(&global_ft.lock); idr_remove(global_ft.idr, fp->persistent_id); +} + +static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) +{ + write_lock(&global_ft.lock); + __ksmbd_remove_durable_fd(fp); write_unlock(&global_ft.lock); + if (waitqueue_active(&dh_wq)) + wake_up(&dh_wq); } static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) @@ -305,7 +319,7 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) struct ksmbd_lock *smb_lock, *tmp_lock; fd_limit_close(); - __ksmbd_remove_durable_fd(fp); + ksmbd_remove_durable_fd(fp); if (ft) __ksmbd_remove_fd(ft, fp); @@ -477,7 +491,10 @@ struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id) struct ksmbd_file *fp; fp = __ksmbd_lookup_fd(&global_ft, id); - if (fp && fp->conn) { + if (fp && (fp->conn || + (fp->durable_scavenger_timeout && + (fp->durable_scavenger_timeout < + jiffies_to_msecs(jiffies))))) { ksmbd_put_durable_fd(fp); fp = NULL; } @@ -694,6 +711,142 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon, return fp->tcon != tcon; } +static bool ksmbd_durable_scavenger_alive(void) +{ + mutex_lock(&durable_scavenger_lock); + if (!durable_scavenger_running) { + mutex_unlock(&durable_scavenger_lock); + return false; + } + mutex_unlock(&durable_scavenger_lock); + + if (kthread_should_stop()) + return false; + + if (idr_is_empty(global_ft.idr)) + return false; + + return true; +} + +static void ksmbd_scavenger_dispose_dh(struct list_head *head) +{ + while (!list_empty(head)) { + struct ksmbd_file *fp; + + fp = list_first_entry(head, struct ksmbd_file, node); + list_del_init(&fp->node); + __ksmbd_close_fd(NULL, fp); + } +} + +static int ksmbd_durable_scavenger(void *dummy) +{ + struct ksmbd_file *fp = NULL; + unsigned int id; + unsigned int min_timeout = 1; + bool found_fp_timeout; + LIST_HEAD(scavenger_list); + unsigned long remaining_jiffies; + + __module_get(THIS_MODULE); + + set_freezable(); + while (ksmbd_durable_scavenger_alive()) { + if (try_to_freeze()) + continue; + + found_fp_timeout = false; + + remaining_jiffies = wait_event_timeout(dh_wq, + ksmbd_durable_scavenger_alive() == false, + __msecs_to_jiffies(min_timeout)); + if (remaining_jiffies) + min_timeout = jiffies_to_msecs(remaining_jiffies); + else + min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; + + write_lock(&global_ft.lock); + idr_for_each_entry(global_ft.idr, fp, id) { + if (!fp->durable_timeout) + continue; + + if (atomic_read(&fp->refcount) > 1 || + fp->conn) + continue; + + found_fp_timeout = true; + if (fp->durable_scavenger_timeout <= + jiffies_to_msecs(jiffies)) { + __ksmbd_remove_durable_fd(fp); + list_add(&fp->node, &scavenger_list); + } else { + unsigned long durable_timeout; + + durable_timeout = + fp->durable_scavenger_timeout - + jiffies_to_msecs(jiffies); + + if (min_timeout > durable_timeout) + min_timeout = durable_timeout; + } + } + write_unlock(&global_ft.lock); + + ksmbd_scavenger_dispose_dh(&scavenger_list); + + if (found_fp_timeout == false) + break; + } + + mutex_lock(&durable_scavenger_lock); + durable_scavenger_running = false; + mutex_unlock(&durable_scavenger_lock); + + module_put(THIS_MODULE); + + return 0; +} + +void ksmbd_launch_ksmbd_durable_scavenger(void) +{ + if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE)) + return; + + mutex_lock(&durable_scavenger_lock); + if (durable_scavenger_running == true) { + mutex_unlock(&durable_scavenger_lock); + return; + } + + durable_scavenger_running = true; + + server_conf.dh_task = kthread_run(ksmbd_durable_scavenger, + (void *)NULL, "ksmbd-durable-scavenger"); + if (IS_ERR(server_conf.dh_task)) + pr_err("cannot start conn thread, err : %ld\n", + PTR_ERR(server_conf.dh_task)); + mutex_unlock(&durable_scavenger_lock); +} + +void ksmbd_stop_durable_scavenger(void) +{ + if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE)) + return; + + mutex_lock(&durable_scavenger_lock); + if (!durable_scavenger_running) { + mutex_unlock(&durable_scavenger_lock); + return; + } + + durable_scavenger_running = false; + if (waitqueue_active(&dh_wq)) + wake_up(&dh_wq); + mutex_unlock(&durable_scavenger_lock); + kthread_stop(server_conf.dh_task); +} + static bool session_fd_check(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp) { @@ -718,6 +871,10 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, fp->tcon = NULL; fp->volatile_id = KSMBD_NO_FID; + if (fp->durable_timeout) + fp->durable_scavenger_timeout = + jiffies_to_msecs(jiffies) + fp->durable_timeout; + return true; } @@ -750,11 +907,12 @@ void ksmbd_free_global_file_table(void) unsigned int id; idr_for_each_entry(global_ft.idr, fp, id) { - __ksmbd_remove_durable_fd(fp); - kmem_cache_free(filp_cache, fp); + ksmbd_remove_durable_fd(fp); + __ksmbd_close_fd(NULL, fp); } - ksmbd_destroy_file_table(&global_ft); + idr_destroy(global_ft.idr); + kfree(global_ft.idr); } int ksmbd_validate_name_reconnect(struct ksmbd_share_config *share, @@ -810,6 +968,7 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) } up_write(&ci->m_lock); + fp->f_state = FP_NEW; __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { fp->conn = NULL; @@ -849,6 +1008,8 @@ int ksmbd_init_file_cache(void) if (!filp_cache) goto out; + init_waitqueue_head(&dh_wq); + return 0; out: diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 5a225e7055f1..b0f6d0f94cb8 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -101,6 +101,7 @@ struct ksmbd_file { struct list_head lock_list; int durable_timeout; + int durable_scavenger_timeout; /* if ls is happening on directory, below is valid*/ struct ksmbd_readdir_data readdir_data; @@ -152,6 +153,8 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp); struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp); +void ksmbd_launch_ksmbd_durable_scavenger(void); +void ksmbd_stop_durable_scavenger(void); void ksmbd_close_tree_conn_fds(struct ksmbd_work *work); void ksmbd_close_session_fds(struct ksmbd_work *work); int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 44666afc6209..bc625788589c 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1540,4 +1540,5 @@ static void __exit exit_ufs_fs(void) module_init(init_ufs_fs) module_exit(exit_ufs_fs) +MODULE_DESCRIPTION("UFS Filesystem"); MODULE_LICENSE("GPL"); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 17e409ceaa33..27a3e9285fbf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -257,7 +257,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, goto out; ret = false; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep); /* * Lockless access: we're in a wait_event so it's ok if it |