aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c6
-rw-r--r--fs/afs/dir_edit.c4
-rw-r--r--fs/aio.c15
-rw-r--r--fs/bcachefs/alloc_foreground.c6
-rw-r--r--fs/bcachefs/clock.c50
-rw-r--r--fs/bcachefs/clock_types.h2
-rw-r--r--fs/bcachefs/ec.c76
-rw-r--r--fs/bcachefs/ec_types.h2
-rw-r--r--fs/bcachefs/fsck.c7
-rw-r--r--fs/bcachefs/journal_reclaim.c1
-rw-r--r--fs/bcachefs/mean_and_variance.h6
-rw-r--r--fs/bcachefs/util.c2
-rw-r--r--fs/bcachefs/util.h118
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/coredump.c14
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/netfs/buffered_read.c4
-rw-r--r--fs/netfs/buffered_write.c2
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/iostat.h4
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nilfs2/bmap.c10
-rw-r--r--fs/nilfs2/segment.c91
-rw-r--r--fs/nilfs2/sysfs.c6
-rw-r--r--fs/ntfs3/attrib.c132
-rw-r--r--fs/ntfs3/bitmap.c2
-rw-r--r--fs/ntfs3/dir.c57
-rw-r--r--fs/ntfs3/file.c124
-rw-r--r--fs/ntfs3/frecord.c110
-rw-r--r--fs/ntfs3/fslog.c77
-rw-r--r--fs/ntfs3/fsntfs.c11
-rw-r--r--fs/ntfs3/index.c4
-rw-r--r--fs/ntfs3/inode.c119
-rw-r--r--fs/ntfs3/namei.c6
-rw-r--r--fs/ntfs3/ntfs.h15
-rw-r--r--fs/ntfs3/ntfs_fs.h36
-rw-r--r--fs/ntfs3/super.c71
-rw-r--r--fs/ntfs3/xattr.c25
-rw-r--r--fs/ocfs2/dir.c46
-rw-r--r--fs/ocfs2/dlmglue.c28
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/xattr.c27
-rw-r--r--fs/proc/internal.h33
-rw-r--r--fs/proc/page.c42
-rw-r--r--fs/proc/task_mmu.c503
-rw-r--r--fs/smb/client/cifsfs.c2
-rw-r--r--fs/smb/client/cifsglob.h17
-rw-r--r--fs/smb/client/file.c53
-rw-r--r--fs/smb/client/smb1ops.c2
-rw-r--r--fs/smb/client/smb2ops.c42
-rw-r--r--fs/smb/client/smb2pdu.c43
-rw-r--r--fs/smb/client/trace.h55
-rw-r--r--fs/smb/client/transport.c8
-rw-r--r--fs/smb/server/connection.h4
-rw-r--r--fs/smb/server/mgmt/user_session.c2
-rw-r--r--fs/smb/server/oplock.h7
-rw-r--r--fs/smb/server/server.c1
-rw-r--r--fs/smb/server/server.h1
-rw-r--r--fs/smb/server/smb2pdu.c2
-rw-r--r--fs/smb/server/smb2pdu.h2
-rw-r--r--fs/smb/server/transport_rdma.c4
-rw-r--r--fs/smb/server/transport_tcp.c4
-rw-r--r--fs/smb/server/vfs_cache.c173
-rw-r--r--fs/smb/server/vfs_cache.h3
-rw-r--r--fs/ufs/super.c1
-rw-r--r--fs/userfaultfd.c2
73 files changed, 1574 insertions, 793 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 67afe68972d5..f8622ed72e08 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -533,14 +533,14 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
break;
}
- offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
+ offset = round_down(ctx->pos, sizeof(*dblock)) - folio_pos(folio);
size = min_t(loff_t, folio_size(folio),
- req->actual_len - folio_file_pos(folio));
+ req->actual_len - folio_pos(folio));
do {
dblock = kmap_local_folio(folio, offset);
ret = afs_dir_iterate_block(dvnode, ctx, dblock,
- folio_file_pos(folio) + offset);
+ folio_pos(folio) + offset);
kunmap_local(dblock);
if (ret != 1)
goto out;
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index e2fa577b66fe..a71bff10496b 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -256,7 +256,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
folio = folio0;
}
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@@ -417,7 +417,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
folio = folio0;
}
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
diff --git a/fs/aio.c b/fs/aio.c
index 93ef59d358b3..6066f64967b3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -410,17 +410,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
struct kioctx *ctx;
unsigned long flags;
pgoff_t idx;
- int rc;
-
- /*
- * We cannot support the _NO_COPY case here, because copy needs to
- * happen under the ctx->completion_lock. That does not work with the
- * migration workflow of MIGRATE_SYNC_NO_COPY.
- */
- if (mode == MIGRATE_SYNC_NO_COPY)
- return -EINVAL;
-
- rc = 0;
+ int rc = 0;
/* mapping->i_private_lock here protects against the kioctx teardown. */
spin_lock(&mapping->i_private_lock);
@@ -465,7 +455,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
* events from being lost.
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
- folio_migrate_copy(dst, src);
+ folio_copy(dst, src);
+ folio_migrate_flags(dst, src);
BUG_ON(ctx->ring_folios[idx] != src);
ctx->ring_folios[idx] = dst;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index cabf866c7956..618d2ff0292e 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -496,12 +496,6 @@ again:
for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
alloc_cursor < k.k->p.offset;
alloc_cursor++) {
- ret = btree_trans_too_many_iters(trans);
- if (ret) {
- ob = ERR_PTR(ret);
- break;
- }
-
s->buckets_seen++;
u64 bucket = alloc_cursor & ~(~0ULL << 56);
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index df3763c18c0e..1d6b691e8da6 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -6,15 +6,29 @@
#include <linux/kthread.h>
#include <linux/preempt.h>
-static inline long io_timer_cmp(io_timer_heap *h,
- struct io_timer *l,
- struct io_timer *r)
+static inline bool io_timer_cmp(const void *l, const void *r, void __always_unused *args)
{
- return l->expire - r->expire;
+ struct io_timer **_l = (struct io_timer **)l;
+ struct io_timer **_r = (struct io_timer **)r;
+
+ return (*_l)->expire < (*_r)->expire;
+}
+
+static inline void io_timer_swp(void *l, void *r, void __always_unused *args)
+{
+ struct io_timer **_l = (struct io_timer **)l;
+ struct io_timer **_r = (struct io_timer **)r;
+
+ swap(*_l, *_r);
}
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = io_timer_cmp,
+ .swp = io_timer_swp,
+ };
+
spin_lock(&clock->timer_lock);
if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) {
@@ -23,22 +37,27 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
return;
}
- for (size_t i = 0; i < clock->timers.used; i++)
+ for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer)
goto out;
- BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
+ BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL));
out:
spin_unlock(&clock->timer_lock);
}
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = io_timer_cmp,
+ .swp = io_timer_swp,
+ };
+
spin_lock(&clock->timer_lock);
- for (size_t i = 0; i < clock->timers.used; i++)
+ for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer) {
- heap_del(&clock->timers, i, io_timer_cmp, NULL);
+ min_heap_del(&clock->timers, i, &callbacks, NULL);
break;
}
@@ -123,10 +142,17 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
{
struct io_timer *ret = NULL;
+ const struct min_heap_callbacks callbacks = {
+ .less = io_timer_cmp,
+ .swp = io_timer_swp,
+ };
+
+ if (clock->timers.nr &&
+ time_after_eq64(now, clock->timers.data[0]->expire)) {
+ ret = *min_heap_peek(&clock->timers);
+ min_heap_pop(&clock->timers, &callbacks, NULL);
+ }
- if (clock->timers.used &&
- time_after_eq64(now, clock->timers.data[0]->expire))
- heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
return ret;
}
@@ -150,7 +176,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
printbuf_tabstop_push(out, 40);
prt_printf(out, "current time:\t%llu\n", now);
- for (unsigned i = 0; i < clock->timers.used; i++)
+ for (unsigned i = 0; i < clock->timers.nr; i++)
prt_printf(out, "%ps %ps:\t%llu\n",
clock->timers.data[i]->fn,
clock->timers.data[i]->fn2,
diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h
index 9c25d0fcf294..37554e4514fe 100644
--- a/fs/bcachefs/clock_types.h
+++ b/fs/bcachefs/clock_types.h
@@ -24,7 +24,7 @@ struct io_timer {
/* Amount to buffer up on a percpu counter */
#define IO_CLOCK_PCPU_SECTORS 128
-typedef HEAP(struct io_timer *) io_timer_heap;
+typedef DEFINE_MIN_HEAP(struct io_timer *, io_timer_heap) io_timer_heap;
struct io_clock {
atomic64_t now;
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 86948d110f6b..9b5b5c9a6c63 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -901,8 +901,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
mutex_lock(&c->ec_stripes_heap_lock);
if (n.size > h->size) {
- memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
- n.used = h->used;
+ memcpy(n.data, h->data, h->nr * sizeof(h->data[0]));
+ n.nr = h->nr;
swap(*h, n);
}
mutex_unlock(&c->ec_stripes_heap_lock);
@@ -993,7 +993,7 @@ static u64 stripe_idx_to_delete(struct bch_fs *c)
lockdep_assert_held(&c->ec_stripes_heap_lock);
- if (h->used &&
+ if (h->nr &&
h->data[0].blocks_nonempty == 0 &&
!bch2_stripe_is_open(c, h->data[0].idx))
return h->data[0].idx;
@@ -1001,14 +1001,6 @@ static u64 stripe_idx_to_delete(struct bch_fs *c)
return 0;
}
-static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
- struct ec_stripe_heap_entry l,
- struct ec_stripe_heap_entry r)
-{
- return ((l.blocks_nonempty > r.blocks_nonempty) -
- (l.blocks_nonempty < r.blocks_nonempty));
-}
-
static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
size_t i)
{
@@ -1017,39 +1009,71 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
}
+static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args)
+{
+ struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
+ struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
+
+ return ((_l->blocks_nonempty > _r->blocks_nonempty) <
+ (_l->blocks_nonempty < _r->blocks_nonempty));
+}
+
+static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
+{
+ struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
+ struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
+ ec_stripes_heap *_h = (ec_stripes_heap *)h;
+ size_t i = _l - _h->data;
+ size_t j = _r - _h->data;
+
+ swap(*_l, *_r);
+
+ ec_stripes_heap_set_backpointer(_h, i);
+ ec_stripes_heap_set_backpointer(_h, j);
+}
+
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
struct stripe *m = genradix_ptr(&c->stripes, idx);
- BUG_ON(m->heap_idx >= h->used);
+ BUG_ON(m->heap_idx >= h->nr);
BUG_ON(h->data[m->heap_idx].idx != idx);
}
void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = ec_stripes_heap_cmp,
+ .swp = ec_stripes_heap_swap,
+ };
+
mutex_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx);
- heap_del(&c->ec_stripes_heap, m->heap_idx,
- ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
+ min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap);
mutex_unlock(&c->ec_stripes_heap_lock);
}
void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = ec_stripes_heap_cmp,
+ .swp = ec_stripes_heap_swap,
+ };
+
mutex_lock(&c->ec_stripes_heap_lock);
- BUG_ON(heap_full(&c->ec_stripes_heap));
+ BUG_ON(min_heap_full(&c->ec_stripes_heap));
- heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
+ genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr;
+ min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) {
.idx = idx,
.blocks_nonempty = m->blocks_nonempty,
}),
- ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
+ &callbacks,
+ &c->ec_stripes_heap);
heap_verify_backpointer(c, idx);
mutex_unlock(&c->ec_stripes_heap_lock);
@@ -1058,6 +1082,10 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = ec_stripes_heap_cmp,
+ .swp = ec_stripes_heap_swap,
+ };
ec_stripes_heap *h = &c->ec_stripes_heap;
bool do_deletes;
size_t i;
@@ -1068,10 +1096,8 @@ void bch2_stripes_heap_update(struct bch_fs *c,
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
- heap_sift_up(h, i, ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
- heap_sift_down(h, i, ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
+ min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap);
+ min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap);
heap_verify_backpointer(c, idx);
@@ -1864,7 +1890,7 @@ static s64 get_existing_stripe(struct bch_fs *c,
return -1;
mutex_lock(&c->ec_stripes_heap_lock);
- for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
+ for (heap_idx = 0; heap_idx < h->nr; heap_idx++) {
/* No blocks worth reusing, stripe will just be deleted: */
if (!h->data[heap_idx].blocks_nonempty)
continue;
@@ -2195,7 +2221,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
size_t i;
mutex_lock(&c->ec_stripes_heap_lock);
- for (i = 0; i < min_t(size_t, h->used, 50); i++) {
+ for (i = 0; i < min_t(size_t, h->nr, 50); i++) {
m = genradix_ptr(&c->stripes, h->data[i].idx);
prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
index 976426da3a12..1df03dccfc72 100644
--- a/fs/bcachefs/ec_types.h
+++ b/fs/bcachefs/ec_types.h
@@ -36,6 +36,6 @@ struct ec_stripe_heap_entry {
unsigned blocks_nonempty;
};
-typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
+typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap;
#endif /* _BCACHEFS_EC_TYPES_H */
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index cc4f0963c0c5..9138944c5ae6 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -283,6 +283,7 @@ static int reattach_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 inode_snapshot)
{
+ struct bch_fs *c = trans->c;
struct bch_hash_info dir_hash;
struct bch_inode_unpacked lostfound;
char name_buf[20];
@@ -317,7 +318,7 @@ static int reattach_inode(struct btree_trans *trans,
return ret;
}
- dir_hash = bch2_hash_info_init(trans->c, &lostfound);
+ dir_hash = bch2_hash_info_init(c, &lostfound);
name = (struct qstr) QSTR(name_buf);
@@ -330,8 +331,10 @@ static int reattach_inode(struct btree_trans *trans,
inode->bi_subvol ?: inode->bi_inum,
&dir_offset,
STR_HASH_must_create);
- if (ret)
+ if (ret) {
+ bch_err_msg(c, ret, "error creating dirent");
return ret;
+ }
inode->bi_dir = lostfound.bi_inum;
inode->bi_dir_offset = dir_offset;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index d8a630742887..70b998d9f19c 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -206,6 +206,7 @@ void bch2_journal_space_available(struct journal *j)
if (nr_online < metadata_replicas_required(c)) {
struct printbuf buf = PRINTBUF;
+ buf.atomic++;
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));
diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h
index 4fcf062dd22c..47e4a3c3d26e 100644
--- a/fs/bcachefs/mean_and_variance.h
+++ b/fs/bcachefs/mean_and_variance.h
@@ -111,11 +111,11 @@ static inline u128_u u128_shl(u128_u i, s8 shift)
{
u128_u r;
- r.lo = i.lo << shift;
+ r.lo = i.lo << (shift & 63);
if (shift < 64)
- r.hi = (i.hi << shift) | (i.lo >> (64 - shift));
+ r.hi = (i.hi << (shift & 63)) | (i.lo >> (-shift & 63));
else {
- r.hi = i.lo << (shift - 64);
+ r.hi = i.lo << (-shift & 63);
r.lo = 0;
}
return r;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 4ec7e44d6e36..138320eaa2ad 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * random utiility code, for bcache but in theory not specific to bcache
+ * random utility code, for bcache but in theory not specific to bcache
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2def4f761ca6..902b7f5406a2 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -8,6 +8,7 @@
#include <linux/errno.h>
#include <linux/freezer.h>
#include <linux/kernel.h>
+#include <linux/min_heap.h>
#include <linux/sched/clock.h>
#include <linux/llist.h>
#include <linux/log2.h>
@@ -54,17 +55,9 @@ static inline size_t buf_pages(void *p, size_t len)
PAGE_SIZE);
}
-#define HEAP(type) \
-struct { \
- size_t size, used; \
- type *data; \
-}
-
-#define DECLARE_HEAP(type, name) HEAP(type) name
-
#define init_heap(heap, _size, gfp) \
({ \
- (heap)->used = 0; \
+ (heap)->nr = 0; \
(heap)->size = (_size); \
(heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\
(gfp)); \
@@ -76,113 +69,6 @@ do { \
(heap)->data = NULL; \
} while (0)
-#define heap_set_backpointer(h, i, _fn) \
-do { \
- void (*fn)(typeof(h), size_t) = _fn; \
- if (fn) \
- fn(h, i); \
-} while (0)
-
-#define heap_swap(h, i, j, set_backpointer) \
-do { \
- swap((h)->data[i], (h)->data[j]); \
- heap_set_backpointer(h, i, set_backpointer); \
- heap_set_backpointer(h, j, set_backpointer); \
-} while (0)
-
-#define heap_peek(h) \
-({ \
- EBUG_ON(!(h)->used); \
- (h)->data[0]; \
-})
-
-#define heap_full(h) ((h)->used == (h)->size)
-
-#define heap_sift_down(h, i, cmp, set_backpointer) \
-do { \
- size_t _c, _j = i; \
- \
- for (; _j * 2 + 1 < (h)->used; _j = _c) { \
- _c = _j * 2 + 1; \
- if (_c + 1 < (h)->used && \
- cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \
- _c++; \
- \
- if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
- break; \
- heap_swap(h, _c, _j, set_backpointer); \
- } \
-} while (0)
-
-#define heap_sift_up(h, i, cmp, set_backpointer) \
-do { \
- while (i) { \
- size_t p = (i - 1) / 2; \
- if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
- break; \
- heap_swap(h, i, p, set_backpointer); \
- i = p; \
- } \
-} while (0)
-
-#define __heap_add(h, d, cmp, set_backpointer) \
-({ \
- size_t _i = (h)->used++; \
- (h)->data[_i] = d; \
- heap_set_backpointer(h, _i, set_backpointer); \
- \
- heap_sift_up(h, _i, cmp, set_backpointer); \
- _i; \
-})
-
-#define heap_add(h, d, cmp, set_backpointer) \
-({ \
- bool _r = !heap_full(h); \
- if (_r) \
- __heap_add(h, d, cmp, set_backpointer); \
- _r; \
-})
-
-#define heap_add_or_replace(h, new, cmp, set_backpointer) \
-do { \
- if (!heap_add(h, new, cmp, set_backpointer) && \
- cmp(h, new, heap_peek(h)) >= 0) { \
- (h)->data[0] = new; \
- heap_set_backpointer(h, 0, set_backpointer); \
- heap_sift_down(h, 0, cmp, set_backpointer); \
- } \
-} while (0)
-
-#define heap_del(h, i, cmp, set_backpointer) \
-do { \
- size_t _i = (i); \
- \
- BUG_ON(_i >= (h)->used); \
- (h)->used--; \
- if ((_i) < (h)->used) { \
- heap_swap(h, _i, (h)->used, set_backpointer); \
- heap_sift_up(h, _i, cmp, set_backpointer); \
- heap_sift_down(h, _i, cmp, set_backpointer); \
- } \
-} while (0)
-
-#define heap_pop(h, d, cmp, set_backpointer) \
-({ \
- bool _r = (h)->used; \
- if (_r) { \
- (d) = (h)->data[0]; \
- heap_del(h, 0, cmp, set_backpointer); \
- } \
- _r; \
-})
-
-#define heap_resort(heap, cmp, set_backpointer) \
-do { \
- ssize_t _i; \
- for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
- heap_sift_down(heap, _i, cmp, set_backpointer); \
-} while (0)
-
#define ANYSINT_MAX(t) \
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b592fc8cf368..0533d0f82dc9 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2981,8 +2981,7 @@ static int relocate_one_folio(struct reloc_control *rc,
if (folio_test_readahead(folio))
page_cache_async_readahead(inode->i_mapping, ra, NULL,
- folio, index,
- last_index + 1 - index);
+ folio, last_index + 1 - index);
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fb3675f5bf50..4ca711a773ef 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5306,7 +5306,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
if (folio_test_readahead(folio))
page_cache_async_readahead(mapping, &sctx->ra, NULL, folio,
- index, last_index + 1 - index);
+ last_index + 1 - index);
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 82a2e2a06a65..5aadc56e0cc0 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,7 +141,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
if (ptr_pos >= i_size_read(dir))
return NULL;
- if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) {
+ if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) {
ceph_readdir_cache_release(cache_ctl);
cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
if (!cache_ctl->page) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 249ddfbb1b03..8f8de8f33abb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1863,7 +1863,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
unsigned idx = ctl->index % nsize;
pgoff_t pgoff = ctl->index / nsize;
- if (!ctl->page || pgoff != page_index(ctl->page)) {
+ if (!ctl->page || pgoff != ctl->page->index) {
ceph_readdir_cache_release(ctl);
if (idx == 0)
ctl->page = grab_cache_page(&dir->i_data, pgoff);
diff --git a/fs/coredump.c b/fs/coredump.c
index a57a06b80f57..4dc5140bac3f 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -361,17 +361,16 @@ out:
return ispipe;
}
-static int zap_process(struct task_struct *start, int exit_code)
+static int zap_process(struct signal_struct *signal, int exit_code)
{
struct task_struct *t;
int nr = 0;
- /* Allow SIGKILL, see prepare_signal() */
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
+ signal->flags = SIGNAL_GROUP_EXIT;
+ signal->group_exit_code = exit_code;
+ signal->group_stop_count = 0;
- for_each_thread(start, t) {
+ __for_each_thread(signal, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL);
@@ -391,8 +390,9 @@ static int zap_threads(struct task_struct *tsk,
spin_lock_irq(&tsk->sighand->siglock);
if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
+ /* Allow SIGKILL, see prepare_signal() */
signal->core_state = core_state;
- nr = zap_process(tsk, exit_code);
+ nr = zap_process(signal, exit_code);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
tsk->flags |= PF_DUMPCORE;
atomic_set(&core_state->nr_threads, nr);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 81dab95f67ed..9f6cff356796 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -222,13 +222,13 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct hstate *h = hstate_file(file);
const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > TASK_SIZE)
+ if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED) {
@@ -239,9 +239,10 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
- vma = find_vma(mm, addr);
- if (mmap_end - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ vma = find_vma_prev(mm, addr, &prev);
+ if (mmap_end - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
@@ -422,7 +423,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
if (!ptep)
return false;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(vma->vm_mm, addr, ptep);
if (huge_pte_none(pte) || !pte_present(pte))
return false;
@@ -892,7 +893,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
error = PTR_ERR(folio);
goto out;
}
- clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
+ folio_zero_user(folio, ALIGN_DOWN(addr, hpage_size));
__folio_mark_uptodate(folio);
error = hugetlb_add_to_page_cache(folio, mapping, index);
if (unlikely(error)) {
@@ -1128,10 +1129,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
hugetlb_set_folio_subpool(src, NULL);
}
- if (mode != MIGRATE_SYNC_NO_COPY)
- folio_migrate_copy(dst, src);
- else
- folio_migrate_flags(dst, src);
+ folio_migrate_flags(dst, src);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 4c0401dbbfcf..a6d5d07cd436 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -271,7 +271,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
kenter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
- folio_file_pos(folio), folio_size(folio),
+ folio_pos(folio), folio_size(folio),
NETFS_READPAGE);
if (IS_ERR(rreq)) {
ret = PTR_ERR(rreq);
@@ -470,7 +470,7 @@ retry:
}
rreq = netfs_alloc_request(mapping, file,
- folio_file_pos(folio), folio_size(folio),
+ folio_pos(folio), folio_size(folio),
NETFS_READ_FOR_WRITE);
if (IS_ERR(rreq)) {
ret = PTR_ERR(rreq);
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index ecbc99ec7d36..68a3f1383cee 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -54,7 +54,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
{
struct netfs_folio *finfo = netfs_folio_info(folio);
struct netfs_group *group = netfs_folio_group(folio);
- loff_t pos = folio_file_pos(folio);
+ loff_t pos = folio_pos(folio);
kenter("");
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9aa2ab218c0a..61a8cdb9f1e1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -591,7 +591,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
- (long long)folio_file_pos(folio));
+ (long long)folio_pos(folio));
sb_start_pagefault(inode->i_sb);
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index b17a9eb9b148..49862c95b224 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -46,6 +46,10 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats)
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 190c1fa8882c..d074d0ceb4f0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -182,7 +182,7 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset,
end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
if (i_size > 0 && folio->index < end_index)
goto out;
- end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
+ end = folio_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
goto out;
trace_nfs_size_grow(inode, end);
@@ -1344,7 +1344,7 @@ int nfs_update_folio(struct file *file, struct folio *folio,
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
- (long long)(folio_file_pos(folio) + offset));
+ (long long)(folio_pos(folio) + offset));
if (!count)
goto out;
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 383f0afa2cea..cd14ea25968c 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -450,15 +450,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
const struct buffer_head *bh)
{
- struct buffer_head *pbh;
- __u64 key;
+ loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh);
- key = page_index(bh->b_page) << (PAGE_SHIFT -
- bmap->b_inode->i_blkbits);
- for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
- key++;
-
- return key;
+ return pos >> bmap->b_inode->i_blkbits;
}
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6ea81f1d5094..0ca3110d6386 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
#define nilfs_cnt32_ge(a, b) \
(typecheck(__u32, a) && typecheck(__u32, b) && \
- ((__s32)(a) - (__s32)(b) >= 0))
+ ((__s32)((a) - (b)) >= 0))
static int nilfs_prepare_segment_lock(struct super_block *sb,
struct nilfs_transaction_info *ti)
@@ -1639,41 +1639,30 @@ static void nilfs_begin_folio_io(struct folio *folio)
folio_unlock(folio);
}
-static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
+/**
+ * nilfs_prepare_write_logs - prepare to write logs
+ * @logs: logs to prepare for writing
+ * @seed: checksum seed value
+ *
+ * nilfs_prepare_write_logs() adds checksums and prepares the block
+ * buffers/folios for writing logs. In order to stabilize folios of
+ * memory-mapped file blocks by putting them in writeback state before
+ * calculating the checksums, first prepare to write payload blocks other
+ * than segment summary and super root blocks in which the checksums will
+ * be embedded.
+ */
+static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed)
{
struct nilfs_segment_buffer *segbuf;
struct folio *bd_folio = NULL, *fs_folio = NULL;
+ struct buffer_head *bh;
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- struct buffer_head *bh;
-
- list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
- b_assoc_buffers) {
- if (bh->b_folio != bd_folio) {
- if (bd_folio) {
- folio_lock(bd_folio);
- folio_wait_writeback(bd_folio);
- folio_clear_dirty_for_io(bd_folio);
- folio_start_writeback(bd_folio);
- folio_unlock(bd_folio);
- }
- bd_folio = bh->b_folio;
- }
- }
-
+ /* Prepare to write payload blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- if (bh == segbuf->sb_super_root) {
- if (bh->b_folio != bd_folio) {
- folio_lock(bd_folio);
- folio_wait_writeback(bd_folio);
- folio_clear_dirty_for_io(bd_folio);
- folio_start_writeback(bd_folio);
- folio_unlock(bd_folio);
- bd_folio = bh->b_folio;
- }
+ if (bh == segbuf->sb_super_root)
break;
- }
set_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_begin_folio_io(fs_folio);
@@ -1681,6 +1670,42 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
}
}
+ nilfs_begin_folio_io(fs_folio);
+
+ nilfs_add_checksums_on_logs(logs, seed);
+
+ /* Prepare to write segment summary blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
+ b_assoc_buffers) {
+ mark_buffer_dirty(bh);
+ if (bh->b_folio == bd_folio)
+ continue;
+ if (bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+ }
+ bd_folio = bh->b_folio;
+ }
+ }
+
+ /* Prepare to write super root block */
+ bh = NILFS_LAST_SEGBUF(logs)->sb_super_root;
+ if (bh) {
+ mark_buffer_dirty(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+ bd_folio = bh->b_folio;
+ }
+ }
+
if (bd_folio) {
folio_lock(bd_folio);
folio_wait_writeback(bd_folio);
@@ -1688,7 +1713,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
folio_start_writeback(bd_folio);
folio_unlock(bd_folio);
}
- nilfs_begin_folio_io(fs_folio);
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -2070,10 +2094,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
/* Write partial segments */
- nilfs_segctor_prepare_write(sci);
-
- nilfs_add_checksums_on_logs(&sci->sc_segbufs,
- nilfs->ns_crc_seed);
+ nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed);
err = nilfs_segctor_write(sci, nilfs);
if (unlikely(err))
@@ -2824,8 +2845,6 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
- inode_attach_wb(nilfs->ns_bdev->bd_mapping->host, NULL);
-
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (unlikely(err))
nilfs_detach_log_writer(sb);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 379d22e28ed6..a5569b7f47a3 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \
sg_##name##_kobj); \
complete(&subgroups->sg_##name##_kobj_unregister); \
} \
-static struct kobj_type nilfs_##name##_ktype = { \
+static const struct kobj_type nilfs_##name##_ktype = { \
.default_groups = nilfs_##name##_groups, \
.sysfs_ops = &nilfs_##name##_attr_ops, \
.release = nilfs_##name##_attr_release, \
@@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = {
.store = nilfs_snapshot_attr_store,
};
-static struct kobj_type nilfs_snapshot_ktype = {
+static const struct kobj_type nilfs_snapshot_ktype = {
.default_groups = nilfs_snapshot_groups,
.sysfs_ops = &nilfs_snapshot_attr_ops,
.release = nilfs_snapshot_attr_release,
@@ -967,7 +967,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = {
.store = nilfs_dev_attr_store,
};
-static struct kobj_type nilfs_dev_ktype = {
+static const struct kobj_type nilfs_dev_ktype = {
.default_groups = nilfs_dev_groups,
.sysfs_ops = &nilfs_dev_attr_ops,
.release = nilfs_dev_attr_release,
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 8e6bcdf99770..6ede3e924dec 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -231,7 +231,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
struct ntfs_sb_info *sbi;
struct ATTRIB *attr_s;
struct MFT_REC *rec;
- u32 used, asize, rsize, aoff, align;
+ u32 used, asize, rsize, aoff;
bool is_data;
CLST len, alen;
char *next;
@@ -252,10 +252,13 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
rsize = le32_to_cpu(attr->res.data_size);
is_data = attr->type == ATTR_DATA && !attr->name_len;
- align = sbi->cluster_size;
- if (is_attr_compressed(attr))
- align <<= COMPRESSION_UNIT;
- len = (rsize + align - 1) >> sbi->cluster_bits;
+ /* len - how many clusters required to store 'rsize' bytes */
+ if (is_attr_compressed(attr)) {
+ u8 shift = sbi->cluster_bits + NTFS_LZNT_CUNIT;
+ len = ((rsize + (1u << shift) - 1) >> shift) << NTFS_LZNT_CUNIT;
+ } else {
+ len = bytes_to_cluster(sbi, rsize);
+ }
run_init(run);
@@ -285,22 +288,21 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
if (err)
goto out2;
} else if (!page) {
- char *kaddr;
-
- page = grab_cache_page(ni->vfs_inode.i_mapping, 0);
- if (!page) {
- err = -ENOMEM;
+ struct address_space *mapping = ni->vfs_inode.i_mapping;
+ struct folio *folio;
+
+ folio = __filemap_get_folio(
+ mapping, 0, FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
goto out2;
}
- kaddr = kmap_atomic(page);
- memcpy(kaddr, data, rsize);
- memset(kaddr + rsize, 0, PAGE_SIZE - rsize);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
- SetPageUptodate(page);
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ folio_fill_tail(folio, 0, data, rsize);
+ folio_mark_uptodate(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
}
}
@@ -670,7 +672,8 @@ pack_runs:
goto undo_2;
}
- if (!is_mft)
+ /* keep runs for $MFT::$ATTR_DATA and $MFT::$ATTR_BITMAP. */
+ if (ni->mi.rno != MFT_REC_MFT)
run_truncate_head(run, evcn + 1);
svcn = le64_to_cpu(attr->nres.svcn);
@@ -972,6 +975,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
if (err)
goto out;
+ /* Check for compressed frame. */
+ err = attr_is_frame_compressed(ni, attr, vcn >> NTFS_LZNT_CUNIT, &hint);
+ if (err)
+ goto out;
+
+ if (hint) {
+ /* if frame is compressed - don't touch it. */
+ *lcn = COMPRESSED_LCN;
+ *len = hint;
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
if (!*len) {
if (run_lookup_entry(run, vcn, lcn, len, NULL)) {
if (*lcn != SPARSE_LCN || !new)
@@ -1223,11 +1239,12 @@ undo1:
goto out;
}
-int attr_data_read_resident(struct ntfs_inode *ni, struct page *page)
+int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio)
{
u64 vbo;
struct ATTRIB *attr;
u32 data_size;
+ size_t len;
attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL);
if (!attr)
@@ -1236,30 +1253,20 @@ int attr_data_read_resident(struct ntfs_inode *ni, struct page *page)
if (attr->non_res)
return E_NTFS_NONRESIDENT;
- vbo = page->index << PAGE_SHIFT;
+ vbo = folio->index << PAGE_SHIFT;
data_size = le32_to_cpu(attr->res.data_size);
- if (vbo < data_size) {
- const char *data = resident_data(attr);
- char *kaddr = kmap_atomic(page);
- u32 use = data_size - vbo;
-
- if (use > PAGE_SIZE)
- use = PAGE_SIZE;
+ if (vbo > data_size)
+ len = 0;
+ else
+ len = min(data_size - vbo, folio_size(folio));
- memcpy(kaddr, data + vbo, use);
- memset(kaddr + use, 0, PAGE_SIZE - use);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
- SetPageUptodate(page);
- } else if (!PageUptodate(page)) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- }
+ folio_fill_tail(folio, 0, resident_data(attr) + vbo, len);
+ folio_mark_uptodate(folio);
return 0;
}
-int attr_data_write_resident(struct ntfs_inode *ni, struct page *page)
+int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio)
{
u64 vbo;
struct mft_inode *mi;
@@ -1275,17 +1282,13 @@ int attr_data_write_resident(struct ntfs_inode *ni, struct page *page)
return E_NTFS_NONRESIDENT;
}
- vbo = page->index << PAGE_SHIFT;
+ vbo = folio->index << PAGE_SHIFT;
data_size = le32_to_cpu(attr->res.data_size);
if (vbo < data_size) {
char *data = resident_data(attr);
- char *kaddr = kmap_atomic(page);
- u32 use = data_size - vbo;
+ size_t len = min(data_size - vbo, folio_size(folio));
- if (use > PAGE_SIZE)
- use = PAGE_SIZE;
- memcpy(data + vbo, kaddr, use);
- kunmap_atomic(kaddr);
+ memcpy_from_folio(data + vbo, folio, 0, len);
mi->dirty = true;
}
ni->i_valid = data_size;
@@ -1378,7 +1381,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
u32 voff;
u8 bytes_per_off;
char *addr;
- struct page *page;
+ struct folio *folio;
int i, err;
__le32 *off32;
__le64 *off64;
@@ -1423,18 +1426,18 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
wof_size = le64_to_cpu(attr->nres.data_size);
down_write(&ni->file.run_lock);
- page = ni->file.offs_page;
- if (!page) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
+ folio = ni->file.offs_folio;
+ if (!folio) {
+ folio = folio_alloc(GFP_KERNEL, 0);
+ if (!folio) {
err = -ENOMEM;
goto out;
}
- page->index = -1;
- ni->file.offs_page = page;
+ folio->index = -1;
+ ni->file.offs_folio = folio;
}
- lock_page(page);
- addr = page_address(page);
+ folio_lock(folio);
+ addr = folio_address(folio);
if (vbo[1]) {
voff = vbo[1] & (PAGE_SIZE - 1);
@@ -1450,7 +1453,8 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
do {
pgoff_t index = vbo[i] >> PAGE_SHIFT;
- if (index != page->index) {
+ if (index != folio->index) {
+ struct page *page = &folio->page;
u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1);
u64 to = min(from + PAGE_SIZE, wof_size);
@@ -1463,10 +1467,10 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
err = ntfs_bio_pages(sbi, run, &page, 1, from,
to - from, REQ_OP_READ);
if (err) {
- page->index = -1;
+ folio->index = -1;
goto out1;
}
- page->index = index;
+ folio->index = index;
}
if (i) {
@@ -1504,7 +1508,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
*ondisk_size = off[1] - off[0];
out1:
- unlock_page(page);
+ folio_unlock(folio);
out:
up_write(&ni->file.run_lock);
return err;
@@ -1722,6 +1726,7 @@ repack:
attr_b->nres.total_size = cpu_to_le64(total_size);
inode_set_bytes(&ni->vfs_inode, total_size);
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
mi_b->dirty = true;
mark_inode_dirty(&ni->vfs_inode);
@@ -2356,8 +2361,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
}
- if (vbo > data_size) {
- /* Insert range after the file size is not allowed. */
+ if (vbo >= data_size) {
+ /*
+ * Insert range after the file size is not allowed.
+ * If the offset is equal to or greater than the end of
+ * file, an error is returned. For such operations (i.e., inserting
+ * a hole at the end of file), ftruncate(2) should be used.
+ */
return -EINVAL;
}
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index c9eb01ccee51..cf4fe21a5039 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -1382,7 +1382,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
if (err)
- break;
+ return err;
bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
if (!bh)
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index 1937e8e612f8..fc6a8aa29e3a 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -272,9 +272,12 @@ out:
return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode;
}
-static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
- const struct NTFS_DE *e, u8 *name,
- struct dir_context *ctx)
+/*
+ * returns false if 'ctx' if full
+ */
+static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi,
+ struct ntfs_inode *ni, const struct NTFS_DE *e,
+ u8 *name, struct dir_context *ctx)
{
const struct ATTR_FILE_NAME *fname;
unsigned long ino;
@@ -284,29 +287,29 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
fname = Add2Ptr(e, sizeof(struct NTFS_DE));
if (fname->type == FILE_NAME_DOS)
- return 0;
+ return true;
if (!mi_is_ref(&ni->mi, &fname->home))
- return 0;
+ return true;
ino = ino_get(&e->ref);
if (ino == MFT_REC_ROOT)
- return 0;
+ return true;
/* Skip meta files. Unless option to show metafiles is set. */
if (!sbi->options->showmeta && ntfs_is_meta_file(sbi, ino))
- return 0;
+ return true;
if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN))
- return 0;
+ return true;
name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name,
PATH_MAX);
if (name_len <= 0) {
ntfs_warn(sbi->sb, "failed to convert name for inode %lx.",
ino);
- return 0;
+ return true;
}
/*
@@ -326,7 +329,8 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
* It does additional locks/reads just to get the type of name.
* Should we use additional mount option to enable branch below?
*/
- if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ if (((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) ||
+ fname->dup.ea_size) &&
ino != ni->mi.rno) {
struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
if (!IS_ERR_OR_NULL(inode)) {
@@ -335,17 +339,20 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
}
}
- return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
+ return dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}
/*
* ntfs_read_hdr - Helper function for ntfs_readdir().
+ *
+ * returns 0 if ok.
+ * returns -EINVAL if directory is corrupted.
+ * returns +1 if 'ctx' is full.
*/
static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
const struct INDEX_HDR *hdr, u64 vbo, u64 pos,
u8 *name, struct dir_context *ctx)
{
- int err;
const struct NTFS_DE *e;
u32 e_size;
u32 end = le32_to_cpu(hdr->used);
@@ -353,12 +360,12 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
for (;; off += e_size) {
if (off + sizeof(struct NTFS_DE) > end)
- return -1;
+ return -EINVAL;
e = Add2Ptr(hdr, off);
e_size = le16_to_cpu(e->size);
if (e_size < sizeof(struct NTFS_DE) || off + e_size > end)
- return -1;
+ return -EINVAL;
if (de_is_last(e))
return 0;
@@ -368,14 +375,15 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
continue;
if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME)
- return -1;
+ return -EINVAL;
ctx->pos = vbo + off;
/* Submit the name to the filldir callback. */
- err = ntfs_filldir(sbi, ni, e, name, ctx);
- if (err)
- return err;
+ if (!ntfs_dir_emit(sbi, ni, e, name, ctx)) {
+ /* ctx is full. */
+ return +1;
+ }
}
}
@@ -474,8 +482,6 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
vbo = (u64)bit << index_bits;
if (vbo >= i_size) {
- ntfs_inode_err(dir, "Looks like your dir is corrupt");
- ctx->pos = eod;
err = -EINVAL;
goto out;
}
@@ -498,9 +504,16 @@ out:
__putname(name);
put_indx_node(node);
- if (err == -ENOENT) {
+ if (err == 1) {
+ /* 'ctx' is full. */
+ err = 0;
+ } else if (err == -ENOENT) {
err = 0;
ctx->pos = pos;
+ } else if (err < 0) {
+ if (err == -EINVAL)
+ ntfs_inode_err(dir, "directory corrupted");
+ ctx->pos = eod;
}
return err;
@@ -618,10 +631,12 @@ const struct file_operations ntfs_dir_operations = {
#endif
};
+#if IS_ENABLED(CONFIG_NTFS_FS)
const struct file_operations ntfs_legacy_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = ntfs_readdir,
.open = ntfs_file_open,
};
+#endif
// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 2f903b6ce157..ca1ddc46bd86 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -13,6 +13,7 @@
#include <linux/compat.h>
#include <linux/falloc.h>
#include <linux/fiemap.h>
+#include <linux/fileattr.h>
#include "debug.h"
#include "ntfs.h"
@@ -48,6 +49,65 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
return 0;
}
+/*
+ * ntfs_fileattr_get - inode_operations::fileattr_get
+ */
+int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct ntfs_inode *ni = ntfs_i(inode);
+ u32 flags = 0;
+
+ if (inode->i_flags & S_IMMUTABLE)
+ flags |= FS_IMMUTABLE_FL;
+
+ if (inode->i_flags & S_APPEND)
+ flags |= FS_APPEND_FL;
+
+ if (is_compressed(ni))
+ flags |= FS_COMPR_FL;
+
+ if (is_encrypted(ni))
+ flags |= FS_ENCRYPT_FL;
+
+ fileattr_fill_flags(fa, flags);
+
+ return 0;
+}
+
+/*
+ * ntfs_fileattr_set - inode_operations::fileattr_set
+ */
+int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ u32 flags = fa->flags;
+ unsigned int new_fl = 0;
+
+ if (fileattr_has_fsx(fa))
+ return -EOPNOTSUPP;
+
+ if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL))
+ return -EOPNOTSUPP;
+
+ if (flags & FS_IMMUTABLE_FL)
+ new_fl |= S_IMMUTABLE;
+
+ if (flags & FS_APPEND_FL)
+ new_fl |= S_APPEND;
+
+ inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND);
+
+ inode_set_ctime_current(inode);
+ mark_inode_dirty(inode);
+
+ return 0;
+}
+
+/*
+ * ntfs_ioctl - file_operations::unlocked_ioctl
+ */
long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -77,20 +137,27 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct inode *inode = d_inode(path->dentry);
struct ntfs_inode *ni = ntfs_i(inode);
+ stat->result_mask |= STATX_BTIME;
+ stat->btime = ni->i_crtime;
+ stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */
+
+ if (inode->i_flags & S_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+
+ if (inode->i_flags & S_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+
if (is_compressed(ni))
stat->attributes |= STATX_ATTR_COMPRESSED;
if (is_encrypted(ni))
stat->attributes |= STATX_ATTR_ENCRYPTED;
- stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
+ stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED |
+ STATX_ATTR_IMMUTABLE | STATX_ATTR_APPEND;
generic_fillattr(idmap, request_mask, inode, stat);
- stat->result_mask |= STATX_BTIME;
- stat->btime = ni->i_crtime;
- stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */
-
return 0;
}
@@ -196,9 +263,9 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
PAGE_SIZE;
iblock = page_off >> inode->i_blkbits;
- folio = __filemap_get_folio(mapping, idx,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- mapping_gfp_constraint(mapping, ~__GFP_FS));
+ folio = __filemap_get_folio(
+ mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -253,8 +320,7 @@ out:
*/
static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT);
bool rw = vma->vm_flags & VM_WRITE;
@@ -299,10 +365,7 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
}
if (ni->i_valid < to) {
- if (!inode_trylock(inode)) {
- err = -EAGAIN;
- goto out;
- }
+ inode_lock(inode);
err = ntfs_extend_initialized_size(file, ni,
ni->i_valid, to);
inode_unlock(inode);
@@ -431,7 +494,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
*/
static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
{
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb;
struct ntfs_sb_info *sbi = sb->s_fs_info;
@@ -744,7 +807,7 @@ out:
static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
@@ -781,7 +844,7 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- struct inode *inode = in->f_mapping->host;
+ struct inode *inode = file_inode(in);
struct ntfs_inode *ni = ntfs_i(inode);
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
@@ -824,23 +887,25 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index,
*frame_uptodate = true;
for (npages = 0; npages < pages_per_frame; npages++, index++) {
- struct page *page;
+ struct folio *folio;
- page = find_or_create_page(mapping, index, gfp_mask);
- if (!page) {
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ gfp_mask);
+ if (IS_ERR(folio)) {
while (npages--) {
- page = pages[npages];
- unlock_page(page);
- put_page(page);
+ folio = page_folio(pages[npages]);
+ folio_unlock(folio);
+ folio_put(folio);
}
return -ENOMEM;
}
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
*frame_uptodate = false;
- pages[npages] = page;
+ pages[npages] = &folio->page;
}
return 0;
@@ -1075,8 +1140,7 @@ out:
static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = file_inode(file);
ssize_t ret;
int err;
struct ntfs_inode *ni = ntfs_i(inode);
@@ -1198,7 +1262,7 @@ static int ntfs_file_release(struct inode *inode, struct file *file)
}
/*
- * ntfs_fiemap - file_operations::fiemap
+ * ntfs_fiemap - inode_operations::fiemap
*/
int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
@@ -1227,6 +1291,8 @@ const struct inode_operations ntfs_file_inode_operations = {
.get_acl = ntfs_get_acl,
.set_acl = ntfs_set_acl,
.fiemap = ntfs_fiemap,
+ .fileattr_get = ntfs_fileattr_get,
+ .fileattr_set = ntfs_fileattr_set,
};
const struct file_operations ntfs_file_operations = {
@@ -1246,6 +1312,7 @@ const struct file_operations ntfs_file_operations = {
.release = ntfs_file_release,
};
+#if IS_ENABLED(CONFIG_NTFS_FS)
const struct file_operations ntfs_legacy_file_operations = {
.llseek = generic_file_llseek,
.read_iter = ntfs_file_read_iter,
@@ -1253,4 +1320,5 @@ const struct file_operations ntfs_legacy_file_operations = {
.open = ntfs_file_open,
.release = ntfs_file_release,
};
+#endif
// clang-format on
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 0008670939a4..a469c608a394 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -122,10 +122,10 @@ void ni_clear(struct ntfs_inode *ni)
else {
run_close(&ni->file.run);
#ifdef CONFIG_NTFS3_LZX_XPRESS
- if (ni->file.offs_page) {
+ if (ni->file.offs_folio) {
/* On-demand allocated page for offsets. */
- put_page(ni->file.offs_page);
- ni->file.offs_page = NULL;
+ folio_put(ni->file.offs_folio);
+ ni->file.offs_folio = NULL;
}
#endif
}
@@ -1501,7 +1501,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
if (is_ext) {
if (flags & ATTR_FLAG_COMPRESSED)
- attr->nres.c_unit = COMPRESSION_UNIT;
+ attr->nres.c_unit = NTFS_LZNT_CUNIT;
attr->nres.total_size = attr->nres.alloc_size;
}
@@ -1601,8 +1601,10 @@ int ni_delete_all(struct ntfs_inode *ni)
asize = le32_to_cpu(attr->size);
roff = le16_to_cpu(attr->nres.run_off);
- if (roff > asize)
+ if (roff > asize) {
+ _ntfs_bad_inode(&ni->vfs_inode);
return -EINVAL;
+ }
/* run==1 means unpack and deallocate. */
run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn,
@@ -1897,6 +1899,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
}
/*
+ * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent
+ * but it accepts kernel address for fi_extents_start
+ */
+static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo,
+ u64 logical, u64 phys, u64 len, u32 flags)
+{
+ struct fiemap_extent extent;
+ struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
+
+ /* only count the extents */
+ if (fieinfo->fi_extents_max == 0) {
+ fieinfo->fi_extents_mapped++;
+ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+ }
+
+ if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
+ return 1;
+
+ if (flags & FIEMAP_EXTENT_DELALLOC)
+ flags |= FIEMAP_EXTENT_UNKNOWN;
+ if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
+ flags |= FIEMAP_EXTENT_ENCODED;
+ if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
+ flags |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+ memset(&extent, 0, sizeof(extent));
+ extent.fe_logical = logical;
+ extent.fe_physical = phys;
+ extent.fe_length = len;
+ extent.fe_flags = flags;
+
+ dest += fieinfo->fi_extents_mapped;
+ memcpy(dest, &extent, sizeof(extent));
+
+ fieinfo->fi_extents_mapped++;
+ if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
+ return 1;
+ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+}
+
+/*
* ni_fiemap - Helper for file_fiemap().
*
* Assumed ni_lock.
@@ -1906,6 +1949,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
__u64 vbo, __u64 len)
{
int err = 0;
+ struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start;
+ struct fiemap_extent *fe_k = NULL;
struct ntfs_sb_info *sbi = ni->mi.sbi;
u8 cluster_bits = sbi->cluster_bits;
struct runs_tree *run;
@@ -1953,6 +1998,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
goto out;
}
+ /*
+ * To avoid lock problems replace pointer to user memory by pointer to kernel memory.
+ */
+ fe_k = kmalloc_array(fieinfo->fi_extents_max,
+ sizeof(struct fiemap_extent),
+ GFP_NOFS | __GFP_ZERO);
+ if (!fe_k) {
+ err = -ENOMEM;
+ goto out;
+ }
+ fieinfo->fi_extents_start = fe_k;
+
end = vbo + len;
alloc_size = le64_to_cpu(attr->nres.alloc_size);
if (end > alloc_size)
@@ -2041,8 +2098,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
if (vbo + dlen >= end)
flags |= FIEMAP_EXTENT_LAST;
- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
- flags);
+ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen,
+ flags);
+
if (err < 0)
break;
if (err == 1) {
@@ -2062,7 +2120,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
if (vbo + bytes >= end)
flags |= FIEMAP_EXTENT_LAST;
- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
+ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes,
+ flags);
if (err < 0)
break;
if (err == 1) {
@@ -2075,7 +2134,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
up_read(run_lock);
+ /*
+ * Copy to user memory out of lock
+ */
+ if (copy_to_user(fe_u, fe_k,
+ fieinfo->fi_extents_max *
+ sizeof(struct fiemap_extent))) {
+ err = -EFAULT;
+ }
+
out:
+ /* Restore original pointer. */
+ fieinfo->fi_extents_start = fe_u;
+ kfree(fe_k);
return err;
}
@@ -2085,12 +2156,12 @@ out:
* When decompressing, we typically obtain more than one page per reference.
* We inject the additional pages into the page cache.
*/
-int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
+int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio)
{
int err;
struct ntfs_sb_info *sbi = ni->mi.sbi;
- struct address_space *mapping = page->mapping;
- pgoff_t index = page->index;
+ struct address_space *mapping = folio->mapping;
+ pgoff_t index = folio->index;
u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT;
struct page **pages = NULL; /* Array of at most 16 pages. stack? */
u8 frame_bits;
@@ -2100,7 +2171,8 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
struct page *pg;
if (vbo >= i_size_read(&ni->vfs_inode)) {
- SetPageUptodate(page);
+ folio_zero_range(folio, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
err = 0;
goto out;
}
@@ -2124,7 +2196,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
goto out;
}
- pages[idx] = page;
+ pages[idx] = &folio->page;
index = frame_vbo >> PAGE_SHIFT;
gfp_mask = mapping_gfp_mask(mapping);
@@ -2143,9 +2215,6 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame);
out1:
- if (err)
- SetPageError(page);
-
for (i = 0; i < pages_per_frame; i++) {
pg = pages[i];
if (i == idx || !pg)
@@ -2157,7 +2226,7 @@ out1:
out:
/* At this point, err contains 0 or -EIO depending on the "critical" page. */
kfree(pages);
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
@@ -2362,9 +2431,9 @@ remove_wof:
/* Clear cached flag. */
ni->ni_flags &= ~NI_FLAG_COMPRESSED_MASK;
- if (ni->file.offs_page) {
- put_page(ni->file.offs_page);
- ni->file.offs_page = NULL;
+ if (ni->file.offs_folio) {
+ folio_put(ni->file.offs_folio);
+ ni->file.offs_folio = NULL;
}
mapping->a_ops = &ntfs_aops;
@@ -2718,7 +2787,6 @@ out:
for (i = 0; i < pages_per_frame; i++) {
pg = pages[i];
kunmap(pg);
- ClearPageError(pg);
SetPageUptodate(pg);
}
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index d7807d255dfe..c64dd114ac65 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -724,7 +724,8 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
if (!rsize || rsize > bytes ||
rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
- le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
+ le16_to_cpu(rt->total) > ne || ff > ts - sizeof(__le32) ||
+ lf > ts - sizeof(__le32) ||
(ff && ff < sizeof(struct RESTART_TABLE)) ||
(lf && lf < sizeof(struct RESTART_TABLE))) {
return false;
@@ -754,6 +755,9 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
return false;
off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
+
+ if (off > ts - sizeof(__le32))
+ return false;
}
return true;
@@ -2992,7 +2996,7 @@ static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
if (is_ext) {
attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
if (is_attr_compressed(attr))
- attr->nres.c_unit = COMPRESSION_UNIT;
+ attr->nres.c_unit = NTFS_LZNT_CUNIT;
attr->nres.run_off =
cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
@@ -3722,6 +3726,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
struct ATTR_NAME_ENTRY *attr_names = NULL;
+ u32 attr_names_bytes = 0;
+ u32 oatbl_bytes = 0;
struct RESTART_TABLE *dptbl = NULL;
struct RESTART_TABLE *trtbl = NULL;
const struct RESTART_TABLE *rt;
@@ -3736,6 +3742,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
struct NTFS_RESTART *rst = NULL;
struct lcb *lcb = NULL;
struct OPEN_ATTR_ENRTY *oe;
+ struct ATTR_NAME_ENTRY *ane;
struct TRANSACTION_ENTRY *tr;
struct DIR_PAGE_ENTRY *dp;
u32 i, bytes_per_attr_entry;
@@ -3915,6 +3922,9 @@ check_restart_area:
goto out;
}
+ log->page_mask = log->page_size - 1;
+ log->page_bits = blksize_bits(log->page_size);
+
/* If the file size has shrunk then we won't mount it. */
if (log->l_size < le64_to_cpu(ra2->l_size)) {
err = -EINVAL;
@@ -4104,7 +4114,7 @@ process_log:
/* Allocate and Read the Transaction Table. */
if (!rst->transact_table_len)
- goto check_dirty_page_table;
+ goto check_dirty_page_table; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->transact_table_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4144,7 +4154,7 @@ process_log:
check_dirty_page_table:
/* The next record back should be the Dirty Pages Table. */
if (!rst->dirty_pages_len)
- goto check_attribute_names;
+ goto check_attribute_names; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4180,7 +4190,7 @@ check_dirty_page_table:
/* Convert Ra version '0' into version '1'. */
if (rst->major_ver)
- goto end_conv_1;
+ goto end_conv_1; /* reduce tab pressure. */
dp = NULL;
while ((dp = enum_rstbl(dptbl, dp))) {
@@ -4200,8 +4210,7 @@ end_conv_1:
* remembering the oldest lsn values.
*/
if (sbi->cluster_size <= log->page_size)
- goto trace_dp_table;
-
+ goto trace_dp_table; /* reduce tab pressure. */
dp = NULL;
while ((dp = enum_rstbl(dptbl, dp))) {
struct DIR_PAGE_ENTRY *next = dp;
@@ -4222,7 +4231,7 @@ trace_dp_table:
check_attribute_names:
/* The next record should be the Attribute Names. */
if (!rst->attr_names_len)
- goto check_attr_table;
+ goto check_attr_table; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->attr_names_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4240,9 +4249,9 @@ check_attribute_names:
}
t32 = lrh_length(lrh);
- rec_len -= t32;
+ attr_names_bytes = rec_len - t32;
- attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS);
+ attr_names = kmemdup(Add2Ptr(lrh, t32), attr_names_bytes, GFP_NOFS);
if (!attr_names) {
err = -ENOMEM;
goto out;
@@ -4254,7 +4263,7 @@ check_attribute_names:
check_attr_table:
/* The next record should be the attribute Table. */
if (!rst->open_attr_len)
- goto check_attribute_names2;
+ goto check_attribute_names2; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->open_attr_table_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4274,14 +4283,14 @@ check_attr_table:
t16 = le16_to_cpu(lrh->redo_off);
rt = Add2Ptr(lrh, t16);
- t32 = rec_len - t16;
+ oatbl_bytes = rec_len - t16;
- if (!check_rstbl(rt, t32)) {
+ if (!check_rstbl(rt, oatbl_bytes)) {
err = -EINVAL;
goto out;
}
- oatbl = kmemdup(rt, t32, GFP_NOFS);
+ oatbl = kmemdup(rt, oatbl_bytes, GFP_NOFS);
if (!oatbl) {
err = -ENOMEM;
goto out;
@@ -4314,17 +4323,40 @@ check_attr_table:
lcb = NULL;
check_attribute_names2:
- if (rst->attr_names_len && oatbl) {
- struct ATTR_NAME_ENTRY *ane = attr_names;
- while (ane->off) {
+ if (attr_names && oatbl) {
+ off = 0;
+ for (;;) {
+ /* Check we can use attribute name entry 'ane'. */
+ static_assert(sizeof(*ane) == 4);
+ if (off + sizeof(*ane) > attr_names_bytes) {
+ /* just ignore the rest. */
+ break;
+ }
+
+ ane = Add2Ptr(attr_names, off);
+ t16 = le16_to_cpu(ane->off);
+ if (!t16) {
+ /* this is the only valid exit. */
+ break;
+ }
+
+ /* Check we can use open attribute entry 'oe'. */
+ if (t16 + sizeof(*oe) > oatbl_bytes) {
+ /* just ignore the rest. */
+ break;
+ }
+
/* TODO: Clear table on exit! */
- oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+ oe = Add2Ptr(oatbl, t16);
t16 = le16_to_cpu(ane->name_bytes);
+ off += t16 + sizeof(*ane);
+ if (off > attr_names_bytes) {
+ /* just ignore the rest. */
+ break;
+ }
oe->name_len = t16 / sizeof(short);
oe->ptr = ane->name;
oe->is_attr_name = 2;
- ane = Add2Ptr(ane,
- sizeof(struct ATTR_NAME_ENTRY) + t16);
}
}
@@ -4520,7 +4552,6 @@ copy_lcns:
}
}
goto next_log_record_analyze;
- ;
}
case OpenNonresidentAttribute:
@@ -4659,7 +4690,7 @@ end_log_records_enumerate:
* table are not empty.
*/
if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
- goto end_reply;
+ goto end_replay;
sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
if (is_ro)
@@ -5088,7 +5119,7 @@ undo_action_done:
sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
-end_reply:
+end_replay:
err = 0;
if (is_ro)
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 626d3f2c7e2d..0fa636038b4e 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -2650,8 +2650,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
{
int err;
struct ATTRIB *attr;
+ u32 uni_bytes;
struct ntfs_inode *ni = sbi->volume.ni;
- const u8 max_ulen = 0x80; /* TODO: use attrdef to get maximum length */
/* Allocate PATH_MAX bytes. */
struct cpu_str *uni = __getname();
@@ -2663,7 +2663,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
if (err < 0)
goto out;
- if (uni->len > max_ulen) {
+ uni_bytes = uni->len * sizeof(u16);
+ if (uni_bytes > NTFS_LABEL_MAX_LENGTH * sizeof(u16)) {
ntfs_warn(sbi->sb, "new label is too long");
err = -EFBIG;
goto out;
@@ -2674,13 +2675,13 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
/* Ignore any errors. */
ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL);
- err = ni_insert_resident(ni, uni->len * sizeof(u16), ATTR_LABEL, NULL,
- 0, &attr, NULL, NULL);
+ err = ni_insert_resident(ni, uni_bytes, ATTR_LABEL, NULL, 0, &attr,
+ NULL, NULL);
if (err < 0)
goto unlock_out;
/* write new label in on-disk struct. */
- memcpy(resident_data(attr), uni->name, uni->len * sizeof(u16));
+ memcpy(resident_data(attr), uni->name, uni_bytes);
/* update cached value of current label. */
if (len >= ARRAY_SIZE(sbi->volume.label))
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index d0f15bbf78f6..9089c58a005c 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -978,7 +978,7 @@ static struct indx_node *indx_new(struct ntfs_index *indx,
hdr->used =
cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64));
de_set_vbn_le(e, *sub_vbn);
- hdr->flags = 1;
+ hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES;
} else {
e->size = cpu_to_le16(sizeof(struct NTFS_DE));
hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE));
@@ -1683,7 +1683,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64));
e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST;
- hdr->flags = 1;
+ hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES;
hdr->used = hdr->total =
cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr));
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 0f1664db94ad..6b0bdc474e76 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -18,7 +18,7 @@
#include "ntfs_fs.h"
/*
- * ntfs_read_mft - Read record and parses MFT.
+ * ntfs_read_mft - Read record and parse MFT.
*/
static struct inode *ntfs_read_mft(struct inode *inode,
const struct cpu_str *name,
@@ -441,10 +441,9 @@ end_enum:
* Usually a hard links to directories are disabled.
*/
inode->i_op = &ntfs_dir_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_dir_operations;
- else
- inode->i_fop = &ntfs_dir_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_dir_operations :
+ &ntfs_dir_operations;
ni->i_valid = 0;
} else if (S_ISLNK(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
@@ -454,10 +453,9 @@ end_enum:
} else if (S_ISREG(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
inode->i_op = &ntfs_file_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_file_operations;
- else
- inode->i_fop = &ntfs_file_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_file_operations :
+ &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
if (ino != MFT_REC_MFT)
@@ -580,10 +578,11 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
bh->b_blocknr = RESIDENT_LCN;
bh->b_size = block_size;
if (!folio) {
+ /* direct io (read) or bmap call */
err = 0;
} else {
ni_lock(ni);
- err = attr_data_read_resident(ni, &folio->page);
+ err = attr_data_read_resident(ni, folio);
ni_unlock(ni);
if (!err)
@@ -710,25 +709,24 @@ static sector_t ntfs_bmap(struct address_space *mapping, sector_t block)
static int ntfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
int err;
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
if (is_resident(ni)) {
ni_lock(ni);
- err = attr_data_read_resident(ni, page);
+ err = attr_data_read_resident(ni, folio);
ni_unlock(ni);
if (err != E_NTFS_NONRESIDENT) {
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
}
if (is_compressed(ni)) {
ni_lock(ni);
- err = ni_readpage_cmpr(ni, page);
+ err = ni_readpage_cmpr(ni, folio);
ni_unlock(ni);
return err;
}
@@ -872,7 +870,7 @@ static int ntfs_resident_writepage(struct folio *folio,
return -EIO;
ni_lock(ni);
- ret = attr_data_write_resident(ni, &folio->page);
+ ret = attr_data_write_resident(ni, folio);
ni_unlock(ni);
if (ret != E_NTFS_NONRESIDENT)
@@ -914,24 +912,25 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
*pagep = NULL;
if (is_resident(ni)) {
- struct page *page =
- grab_cache_page_write_begin(mapping, pos >> PAGE_SHIFT);
+ struct folio *folio = __filemap_get_folio(
+ mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
- if (!page) {
- err = -ENOMEM;
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
goto out;
}
ni_lock(ni);
- err = attr_data_read_resident(ni, page);
+ err = attr_data_read_resident(ni, folio);
ni_unlock(ni);
if (!err) {
- *pagep = page;
+ *pagep = &folio->page;
goto out;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (err != E_NTFS_NONRESIDENT)
goto out;
@@ -950,6 +949,7 @@ out:
int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
u32 len, u32 copied, struct page *page, void *fsdata)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
@@ -958,26 +958,26 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
if (is_resident(ni)) {
ni_lock(ni);
- err = attr_data_write_resident(ni, page);
+ err = attr_data_write_resident(ni, folio);
ni_unlock(ni);
if (!err) {
+ struct buffer_head *head = folio_buffers(folio);
dirty = true;
- /* Clear any buffers in page. */
- if (page_has_buffers(page)) {
- struct buffer_head *head, *bh;
+ /* Clear any buffers in folio. */
+ if (head) {
+ struct buffer_head *bh = head;
- bh = head = page_buffers(page);
do {
clear_buffer_dirty(bh);
clear_buffer_mapped(bh);
set_buffer_uptodate(bh);
} while (head != (bh = bh->b_this_page));
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
err = copied;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
} else {
err = generic_write_end(file, mapping, pos, len, copied, page,
fsdata);
@@ -1093,33 +1093,31 @@ int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
if (!ret && i2)
ret = writeback_inode(i2);
if (!ret)
- ret = sync_blockdev_nowait(sb->s_bdev);
+ ret = filemap_flush(sb->s_bdev_file->f_mapping);
return ret;
}
-int inode_write_data(struct inode *inode, const void *data, size_t bytes)
+/*
+ * Helper function to read file.
+ */
+int inode_read_data(struct inode *inode, void *data, size_t bytes)
{
pgoff_t idx;
+ struct address_space *mapping = inode->i_mapping;
- /* Write non resident data. */
for (idx = 0; bytes; idx++) {
size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes;
- struct page *page = ntfs_map_page(inode->i_mapping, idx);
+ struct page *page = read_mapping_page(mapping, idx, NULL);
+ void *kaddr;
if (IS_ERR(page))
return PTR_ERR(page);
- lock_page(page);
- WARN_ON(!PageUptodate(page));
- ClearPageUptodate(page);
-
- memcpy(page_address(page), data, op);
-
- flush_dcache_page(page);
- SetPageUptodate(page);
- unlock_page(page);
+ kaddr = kmap_atomic(page);
+ memcpy(data, kaddr, op);
+ kunmap_atomic(kaddr);
- ntfs_unmap_page(page);
+ put_page(page);
bytes -= op;
data = Add2Ptr(data, PAGE_SIZE);
@@ -1508,7 +1506,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8);
attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
attr->flags = ATTR_FLAG_COMPRESSED;
- attr->nres.c_unit = COMPRESSION_UNIT;
+ attr->nres.c_unit = NTFS_LZNT_CUNIT;
asize = SIZEOF_NONRESIDENT_EX + 8;
} else {
attr->size = cpu_to_le32(SIZEOF_NONRESIDENT + 8);
@@ -1559,7 +1557,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
/*
* Below function 'ntfs_save_wsl_perm' requires 0x78 bytes.
- * It is good idea to keep extened attributes resident.
+ * It is good idea to keep extended attributes resident.
*/
if (asize + t16 + 0x78 + 8 > sbi->record_size) {
CLST alen;
@@ -1628,10 +1626,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
if (S_ISDIR(mode)) {
inode->i_op = &ntfs_dir_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_dir_operations;
- else
- inode->i_fop = &ntfs_dir_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_dir_operations :
+ &ntfs_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &ntfs_link_inode_operations;
inode->i_fop = NULL;
@@ -1640,10 +1637,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
inode_nohighmem(inode);
} else if (S_ISREG(mode)) {
inode->i_op = &ntfs_file_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_file_operations;
- else
- inode->i_fop = &ntfs_file_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_file_operations :
+ &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
init_rwsem(&ni->file.run_lock);
@@ -1668,7 +1664,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
* The packed size of extended attribute is stored in direntry too.
* 'fname' here points to inside new_de.
*/
- ntfs_save_wsl_perm(inode, &fname->dup.ea_size);
+ err = ntfs_save_wsl_perm(inode, &fname->dup.ea_size);
+ if (err)
+ goto out6;
/*
* update ea_size in file_name attribute too.
@@ -1712,6 +1710,12 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
goto out2;
out6:
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_EA, NULL, 0, NULL, NULL);
+ if (attr && attr->non_res) {
+ /* Delete ATTR_EA, if non-resident. */
+ attr_set_size(ni, ATTR_EA, NULL, 0, NULL, 0, NULL, false, NULL);
+ }
+
if (rp_inserted)
ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref);
@@ -2133,5 +2137,6 @@ const struct address_space_operations ntfs_aops = {
const struct address_space_operations ntfs_aops_cmpr = {
.read_folio = ntfs_read_folio,
.readahead = ntfs_readahead,
+ .dirty_folio = block_dirty_folio,
};
// clang-format on
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index 71498421ce60..f16d318c4372 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -112,9 +112,7 @@ static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir,
}
/*
- * ntfs_mknod
- *
- * inode_operations::mknod
+ * ntfs_mknod - inode_operations::mknod
*/
static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
@@ -509,6 +507,8 @@ const struct inode_operations ntfs_dir_inode_operations = {
.getattr = ntfs_getattr,
.listxattr = ntfs_listxattr,
.fiemap = ntfs_fiemap,
+ .fileattr_get = ntfs_fileattr_get,
+ .fileattr_set = ntfs_fileattr_set,
};
const struct inode_operations ntfs_special_inode_operations = {
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 3d6143c7abc0..241f2ffdd920 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -82,9 +82,6 @@ typedef u32 CLST;
#define RESIDENT_LCN ((CLST)-2)
#define COMPRESSED_LCN ((CLST)-3)
-#define COMPRESSION_UNIT 4
-#define COMPRESS_MAX_CLUSTER 0x1000
-
enum RECORD_NUM {
MFT_REC_MFT = 0,
MFT_REC_MIRR = 1,
@@ -696,14 +693,15 @@ static inline bool de_has_vcn_ex(const struct NTFS_DE *e)
offsetof(struct ATTR_FILE_NAME, name) + \
NTFS_NAME_LEN * sizeof(short), 8)
+#define NTFS_INDEX_HDR_HAS_SUBNODES cpu_to_le32(1)
+
struct INDEX_HDR {
__le32 de_off; // 0x00: The offset from the start of this structure
// to the first NTFS_DE.
__le32 used; // 0x04: The size of this structure plus all
// entries (quad-word aligned).
__le32 total; // 0x08: The allocated size of for this structure plus all entries.
- u8 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory.
- u8 res[3];
+ __le32 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory.
//
// de_off + used <= total
@@ -751,7 +749,7 @@ static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr,
static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr)
{
- return hdr->flags & 1;
+ return hdr->flags & NTFS_INDEX_HDR_HAS_SUBNODES;
}
struct INDEX_BUFFER {
@@ -771,7 +769,7 @@ static inline bool ib_is_empty(const struct INDEX_BUFFER *ib)
static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib)
{
- return !(ib->ihdr.flags & 1);
+ return !(ib->ihdr.flags & NTFS_INDEX_HDR_HAS_SUBNODES);
}
/* Index root structure ( 0x90 ). */
@@ -1002,9 +1000,6 @@ struct REPARSE_POINT {
static_assert(sizeof(struct REPARSE_POINT) == 0x18);
-/* Maximum allowed size of the reparse data. */
-#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE (16 * 1024)
-
/*
* The value of the following constant needs to satisfy the following
* conditions:
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index f9ed6d2b065d..e5255a251929 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -383,7 +383,7 @@ struct ntfs_inode {
struct rw_semaphore run_lock;
struct runs_tree run;
#ifdef CONFIG_NTFS3_LZX_XPRESS
- struct page *offs_page;
+ struct folio *offs_folio;
#endif
} file;
};
@@ -434,8 +434,8 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
struct ATTRIB **ret);
int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
CLST *len, bool *new, bool zero);
-int attr_data_read_resident(struct ntfs_inode *ni, struct page *page);
-int attr_data_write_resident(struct ntfs_inode *ni, struct page *page);
+int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio);
+int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio);
int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, u8 name_len, struct runs_tree *run,
CLST vcn);
@@ -497,6 +497,9 @@ extern const struct file_operations ntfs_dir_operations;
extern const struct file_operations ntfs_legacy_dir_operations;
/* Globals from file.c */
+int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct fileattr *fa);
int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, u32 flags);
int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
@@ -564,7 +567,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint);
#define _ni_write_inode(i, w) ni_write_inode(i, w, __func__)
int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
__u64 vbo, __u64 len);
-int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page);
+int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio);
int ni_decompress_file(struct ntfs_inode *ni);
int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
u32 pages_per_frame);
@@ -716,7 +719,7 @@ int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc);
int ntfs_sync_inode(struct inode *inode);
int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
-int inode_write_data(struct inode *inode, const void *data, size_t bytes);
+int inode_read_data(struct inode *inode, void *data, size_t bytes);
int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const struct cpu_str *uni,
umode_t mode, dev_t dev, const char *symname, u32 size,
@@ -910,22 +913,6 @@ static inline bool ntfs_is_meta_file(struct ntfs_sb_info *sbi, CLST rno)
rno == sbi->usn_jrnl_no;
}
-static inline void ntfs_unmap_page(struct page *page)
-{
- kunmap(page);
- put_page(page);
-}
-
-static inline struct page *ntfs_map_page(struct address_space *mapping,
- unsigned long index)
-{
- struct page *page = read_mapping_page(mapping, index, NULL);
-
- if (!IS_ERR(page))
- kmap(page);
- return page;
-}
-
static inline size_t wnd_zone_bit(const struct wnd_bitmap *wnd)
{
return wnd->zone_bit;
@@ -1156,6 +1143,13 @@ static inline void le64_sub_cpu(__le64 *var, u64 val)
*var = cpu_to_le64(le64_to_cpu(*var) - val);
}
+#if IS_ENABLED(CONFIG_NTFS_FS)
bool is_legacy_ntfs(struct super_block *sb);
+#else
+static inline bool is_legacy_ntfs(struct super_block *sb)
+{
+ return false;
+}
+#endif
#endif /* _LINUX_NTFS3_NTFS_FS_H */
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index c5b688c5f984..a8758b85803f 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -275,7 +275,7 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag_no("acl", Opt_acl),
fsparam_string("iocharset", Opt_iocharset),
fsparam_flag_no("prealloc", Opt_prealloc),
- fsparam_flag_no("nocase", Opt_nocase),
+ fsparam_flag_no("case", Opt_nocase),
{}
};
// clang-format on
@@ -464,7 +464,7 @@ static int ntfs3_volinfo(struct seq_file *m, void *o)
struct super_block *sb = m->private;
struct ntfs_sb_info *sbi = sb->s_fs_info;
- seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n",
+ seq_printf(m, "ntfs%d.%d\n%u\n%zu\n%zu\n%zu\n%s\n%s\n",
sbi->volume.major_ver, sbi->volume.minor_ver,
sbi->cluster_size, sbi->used.bitmap.nbits,
sbi->mft.bitmap.nbits,
@@ -1159,7 +1159,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
CLST vcn, lcn, len;
struct ATTRIB *attr;
const struct VOLUME_INFO *info;
- u32 idx, done, bytes;
+ u32 done, bytes;
struct ATTR_DEF_ENTRY *t;
u16 *shared;
struct MFT_REF ref;
@@ -1201,7 +1201,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
/*
* Load $Volume. This should be done before $LogFile
- * 'cause 'sbi->volume.ni' is used 'ntfs_set_state'.
+ * 'cause 'sbi->volume.ni' is used in 'ntfs_set_state'.
*/
ref.low = cpu_to_le32(MFT_REC_VOL);
ref.seq = cpu_to_le16(MFT_REC_VOL);
@@ -1431,31 +1431,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto put_inode_out;
}
- for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) {
- unsigned long tail = bytes - done;
- struct page *page = ntfs_map_page(inode->i_mapping, idx);
+ /* Read the entire file. */
+ err = inode_read_data(inode, sbi->def_table, bytes);
+ if (err) {
+ ntfs_err(sb, "Failed to read $AttrDef (%d).", err);
+ goto put_inode_out;
+ }
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- ntfs_err(sb, "Failed to read $AttrDef (%d).", err);
- goto put_inode_out;
- }
- memcpy(Add2Ptr(t, done), page_address(page),
- min(PAGE_SIZE, tail));
- ntfs_unmap_page(page);
-
- if (!idx && ATTR_STD != t->type) {
- ntfs_err(sb, "$AttrDef is corrupted.");
- err = -EINVAL;
- goto put_inode_out;
- }
+ if (ATTR_STD != t->type) {
+ ntfs_err(sb, "$AttrDef is corrupted.");
+ err = -EINVAL;
+ goto put_inode_out;
}
t += 1;
sbi->def_entries = 1;
done = sizeof(struct ATTR_DEF_ENTRY);
- sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE;
- sbi->ea_max_size = 0x10000; /* default formatter value */
while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) {
u32 t32 = le32_to_cpu(t->type);
@@ -1491,27 +1482,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto put_inode_out;
}
- for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) {
- const __le16 *src;
- u16 *dst = Add2Ptr(sbi->upcase, idx << PAGE_SHIFT);
- struct page *page = ntfs_map_page(inode->i_mapping, idx);
-
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- ntfs_err(sb, "Failed to read $UpCase (%d).", err);
- goto put_inode_out;
- }
-
- src = page_address(page);
+ /* Read the entire file. */
+ err = inode_read_data(inode, sbi->upcase, 0x10000 * sizeof(short));
+ if (err) {
+ ntfs_err(sb, "Failed to read $UpCase (%d).", err);
+ goto put_inode_out;
+ }
#ifdef __BIG_ENDIAN
- for (i = 0; i < PAGE_SIZE / sizeof(u16); i++)
+ {
+ const __le16 *src = sbi->upcase;
+ u16 *dst = sbi->upcase;
+
+ for (i = 0; i < 0x10000; i++)
*dst++ = le16_to_cpu(*src++);
-#else
- memcpy(dst, src, PAGE_SIZE);
-#endif
- ntfs_unmap_page(page);
}
+#endif
shared = ntfs_set_shared(sbi->upcase, 0x10000 * sizeof(short));
if (shared && sbi->upcase != shared) {
@@ -1847,10 +1833,8 @@ bool is_legacy_ntfs(struct super_block *sb)
#else
static inline void register_as_ntfs_legacy(void) {}
static inline void unregister_as_ntfs_legacy(void) {}
-bool is_legacy_ntfs(struct super_block *sb) { return false; }
#endif
-
// clang-format on
static int __init init_ntfs_fs(void)
@@ -1876,8 +1860,7 @@ static int __init init_ntfs_fs(void)
ntfs_inode_cachep = kmem_cache_create(
"ntfs_inode_cache", sizeof(struct ntfs_inode), 0,
- (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT),
- init_once);
+ (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), init_once);
if (!ntfs_inode_cachep) {
err = -ENOMEM;
goto out1;
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 73785dece7a7..0703e1ae32b2 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -195,10 +195,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
{
const struct EA_INFO *info;
struct EA_FULL *ea_all = NULL;
- const struct EA_FULL *ea;
u32 off, size;
int err;
- int ea_size;
size_t ret;
err = ntfs_read_ea(ni, &ea_all, 0, &info);
@@ -212,16 +210,18 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
/* Enumerate all xattrs. */
ret = 0;
- for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) {
- ea = Add2Ptr(ea_all, off);
- ea_size = unpacked_ea_size(ea);
+ off = 0;
+ while (off + sizeof(struct EA_FULL) < size) {
+ const struct EA_FULL *ea = Add2Ptr(ea_all, off);
+ int ea_size = unpacked_ea_size(ea);
+ u8 name_len = ea->name_len;
- if (!ea->name_len)
+ if (!name_len)
break;
- if (ea->name_len > ea_size) {
+ if (name_len > ea_size) {
ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
- err = -EINVAL; /* corrupted fs */
+ err = -EINVAL; /* corrupted fs. */
break;
}
@@ -230,16 +230,17 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
if (off + ea_size > size)
break;
- if (ret + ea->name_len + 1 > bytes_per_buffer) {
+ if (ret + name_len + 1 > bytes_per_buffer) {
err = -ERANGE;
goto out;
}
- memcpy(buffer + ret, ea->name, ea->name_len);
- buffer[ret + ea->name_len] = 0;
+ memcpy(buffer + ret, ea->name, name_len);
+ buffer[ret + name_len] = 0;
}
- ret += ea->name_len + 1;
+ ret += name_len + 1;
+ off += ea_size;
}
out:
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index d620d4c53c6f..f0beb173dbba 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -294,13 +294,16 @@ out:
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
*/
-static int ocfs2_check_dir_entry(struct inode * dir,
- struct ocfs2_dir_entry * de,
- struct buffer_head * bh,
+static int ocfs2_check_dir_entry(struct inode *dir,
+ struct ocfs2_dir_entry *de,
+ struct buffer_head *bh,
+ char *buf,
+ unsigned int size,
unsigned long offset)
{
const char *error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
+ const unsigned long next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -308,9 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(
- ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
- error_msg = "directory entry across blocks";
+ else if (unlikely(next_offset > size))
+ error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
+ next_offset != size)
+ error_msg = "directory entry too close to end";
if (unlikely(error_msg != NULL))
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
@@ -352,16 +357,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
de_buf = first_de;
dlimit = de_buf + bytes;
- while (de_buf < dlimit) {
+ while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
- if (de_buf + namelen <= dlimit &&
+ if (de->name + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de,
+ bytes, offset)) {
ret = -1;
goto bail;
}
@@ -1138,7 +1144,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
pde = NULL;
de = (struct ocfs2_dir_entry *) first_de;
while (i < bytes) {
- if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
@@ -1635,7 +1641,8 @@ int __ocfs2_add_entry(handle_t *handle,
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
- if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start,
+ size, offset)) {
retval = -ENOENT;
goto bail;
}
@@ -1774,7 +1781,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
}
de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
- if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
+ if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
+ i_size_read(inode), ctx->pos)) {
/* On error, skip the f_pos to the end. */
ctx->pos = i_size_read(inode);
break;
@@ -1867,7 +1875,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (ctx->pos < i_size_read(inode)
&& offset < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
- if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
+ sb->s_blocksize, offset)) {
/* On error, skip the f_pos to the
next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
@@ -3339,7 +3348,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
struct super_block *sb = dir->i_sb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_dir_entry *de, *last_de = NULL;
- char *de_buf, *limit;
+ char *first_de, *de_buf, *limit;
unsigned long offset = 0;
unsigned int rec_len, new_rec_len, free_space;
@@ -3352,14 +3361,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
else
free_space = dir->i_sb->s_blocksize - i_size_read(dir);
- de_buf = di->id2.i_data.id_data;
+ first_de = di->id2.i_data.id_data;
+ de_buf = first_de;
limit = de_buf + i_size_read(dir);
rec_len = OCFS2_DIR_REC_LEN(namelen);
while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
- if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de,
+ i_size_read(dir), offset)) {
ret = -ENOENT;
goto out;
}
@@ -3441,7 +3452,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
/* move to next block */
de = (struct ocfs2_dir_entry *) bh->b_data;
}
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize,
+ offset)) {
status = -ENOENT;
goto bail;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index cb40cafbc062..da78a04d6f0b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -221,12 +221,12 @@ struct ocfs2_lock_res_ops {
*/
#define LOCK_TYPE_USES_LVB 0x2
-static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.get_osb = ocfs2_get_inode_osb,
.check_downconvert = ocfs2_check_meta_downconvert,
.set_lvb = ocfs2_set_meta_lvb,
@@ -234,50 +234,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_super_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_super_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH,
};
-static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
.downconvert_worker = ocfs2_dentry_convert_worker,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.get_osb = ocfs2_get_file_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
.set_lvb = ocfs2_set_qinfo_lvb,
.get_osb = ocfs2_get_qinfo_osb,
.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
.check_downconvert = ocfs2_check_refcount_downconvert,
.downconvert_worker = ocfs2_refcount_convert_worker,
.flags = 0,
@@ -510,7 +510,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
- struct ocfs2_lock_res_ops *ops,
+ const struct ocfs2_lock_res_ops *ops,
void *priv)
{
res->l_type = type;
@@ -553,7 +553,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
unsigned int generation,
struct inode *inode)
{
- struct ocfs2_lock_res_ops *ops;
+ const struct ocfs2_lock_res_ops *ops;
switch(type) {
case OCFS2_LOCK_TYPE_RW:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 4d1ea8703fcd..59c92353151a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2189,8 +2189,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
* @osb: ocfs2 file system
* @ret_orphan_dir: Orphan dir inode - returned locked!
* @blkno: Actual block number of the inode to be inserted into orphan dir.
+ * @name: Buffer to store the name of the orphan.
* @lookup: dir lookup result, to be passed back into functions like
* ocfs2_orphan_add
+ * @dio: Flag indicating if direct IO is being used or not.
*
* Returns zero on success and the ret_orphan_dir, name and lookup
* fields will be populated.
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8fe826143d7b..51c52768132d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -154,7 +154,7 @@ struct ocfs2_lock_stats {
struct ocfs2_lock_res {
void *l_priv;
- struct ocfs2_lock_res_ops *l_ops;
+ const struct ocfs2_lock_res_ops *l_ops;
struct list_head l_blocked_list;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index c973c03f6fd8..10157d9d7a9c 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations o2cb_stack_ops = {
+static const struct ocfs2_stack_operations o2cb_stack_ops = {
.connect = o2cb_cluster_connect,
.disconnect = o2cb_cluster_disconnect,
.this_node = o2cb_cluster_this_node,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index c11406cd87a8..77edcd70f72c 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
+static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.connect = user_cluster_connect,
.disconnect = user_cluster_disconnect,
.this_node = user_cluster_this_node,
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 3636847fae19..02ab072c528a 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -223,7 +223,7 @@ struct ocfs2_stack_operations {
*/
struct ocfs2_stack_plugin {
char *sp_name;
- struct ocfs2_stack_operations *sp_ops;
+ const struct ocfs2_stack_operations *sp_ops;
struct module *sp_owner;
/* These are managed by the stackglue code. */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..35c0cc2a51af 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1062,13 +1062,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
return i_ret + b_ret;
}
-static int ocfs2_xattr_find_entry(int name_index,
+static int ocfs2_xattr_find_entry(struct inode *inode, int name_index,
const char *name,
struct ocfs2_xattr_search *xs)
{
struct ocfs2_xattr_entry *entry;
size_t name_len;
- int i, cmp = 1;
+ int i, name_offset, cmp = 1;
if (name == NULL)
return -EINVAL;
@@ -1076,13 +1076,22 @@ static int ocfs2_xattr_find_entry(int name_index,
name_len = strlen(name);
entry = xs->here;
for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+ if ((void *)entry >= xs->end) {
+ ocfs2_error(inode->i_sb, "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
cmp = name_index - ocfs2_xattr_get_type(entry);
if (!cmp)
cmp = name_len - entry->xe_name_len;
- if (!cmp)
- cmp = memcmp(name, (xs->base +
- le16_to_cpu(entry->xe_name_offset)),
- name_len);
+ if (!cmp) {
+ name_offset = le16_to_cpu(entry->xe_name_offset);
+ if ((xs->base + name_offset + name_len) > xs->end) {
+ ocfs2_error(inode->i_sb,
+ "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
+ cmp = memcmp(name, (xs->base + name_offset), name_len);
+ }
if (cmp == 0)
break;
entry += 1;
@@ -1166,7 +1175,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode,
xs->base = (void *)xs->header;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret)
return ret;
size = le64_to_cpu(xs->here->xe_value_size);
@@ -2698,7 +2707,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
/* Find the named attribute. */
if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret && ret != -ENODATA)
return ret;
xs->not_found = ret;
@@ -2833,7 +2842,7 @@ static int ocfs2_xattr_block_find(struct inode *inode,
xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
} else
ret = ocfs2_xattr_index_block_find(inode, blk_bh,
name_index,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a71ac5379584..a8a8576d8592 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -13,6 +13,7 @@
#include <linux/binfmts.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
+#include <linux/mm.h>
struct ctl_table_header;
struct mempolicy;
@@ -142,6 +143,38 @@ unsigned name_to_int(const struct qstr *qstr);
/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13
+/**
+ * folio_precise_page_mapcount() - Number of mappings of this folio page.
+ * @folio: The folio.
+ * @page: The page.
+ *
+ * The number of present user page table entries that reference this page
+ * as tracked via the RMAP: either referenced directly (PTE) or as part of
+ * a larger area that covers this page (e.g., PMD).
+ *
+ * Use this function only for the calculation of existing statistics
+ * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount).
+ *
+ * Do not add new users.
+ *
+ * Returns: The number of mappings of this folio page. 0 for
+ * folios that are not mapped to user space or are not tracked via the RMAP
+ * (e.g., shared zeropage).
+ */
+static inline int folio_precise_page_mapcount(struct folio *folio,
+ struct page *page)
+{
+ int mapcount = atomic_read(&page->_mapcount) + 1;
+
+ /* Handle page_has_type() pages */
+ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+ mapcount = 0;
+ if (folio_test_large(folio))
+ mapcount += folio_entire_mapcount(folio);
+
+ return mapcount;
+}
+
/*
* array.c
*/
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2fb64bdb64eb..b7a5c84b5819 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -37,21 +37,19 @@ static inline unsigned long get_max_dump_pfn(void)
#endif
}
-/* /proc/kpagecount - an array exposing page counts
+/* /proc/kpagecount - an array exposing page mapcounts
*
* Each entry is a u64 representing the corresponding
- * physical page count.
+ * physical page mapcount.
*/
static ssize_t kpagecount_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
- struct page *ppage;
unsigned long src = *ppos;
unsigned long pfn;
ssize_t ret = 0;
- u64 pcount;
pfn = src / KPMSIZE;
if (src & KPMMASK || count & KPMMASK)
@@ -61,18 +59,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
while (count > 0) {
+ struct page *page;
+ u64 mapcount = 0;
+
/*
* TODO: ZONE_DEVICE support requires to identify
* memmaps that were actually initialized.
*/
- ppage = pfn_to_online_page(pfn);
-
- if (!ppage)
- pcount = 0;
- else
- pcount = page_mapcount(ppage);
+ page = pfn_to_online_page(pfn);
+ if (page)
+ mapcount = folio_precise_page_mapcount(page_folio(page),
+ page);
- if (put_user(pcount, out)) {
+ if (put_user(mapcount, out)) {
ret = -EFAULT;
break;
}
@@ -148,19 +147,16 @@ u64 stable_page_flags(const struct page *page)
u |= 1 << KPF_COMPOUND_TAIL;
if (folio_test_hugetlb(folio))
u |= 1 << KPF_HUGE;
- /*
- * We need to check PageLRU/PageAnon
- * to make sure a given page is a thp, not a non-huge compound page.
- */
- else if (folio_test_large(folio)) {
- if ((k & (1 << PG_lru)) || is_anon)
- u |= 1 << KPF_THP;
- else if (is_huge_zero_folio(folio)) {
- u |= 1 << KPF_ZERO_PAGE;
- u |= 1 << KPF_THP;
- }
- } else if (is_zero_pfn(page_to_pfn(page)))
+ else if (folio_test_large(folio) &&
+ folio_test_large_rmappable(folio)) {
+ /* Note: we indicate any THPs here, not just PMD-sized ones */
+ u |= 1 << KPF_THP;
+ } else if (is_huge_zero_folio(folio)) {
u |= 1 << KPF_ZERO_PAGE;
+ u |= 1 << KPF_THP;
+ } else if (is_zero_folio(folio)) {
+ u |= 1 << KPF_ZERO_PAGE;
+ }
/*
* Caveats on high order pages: PG_buddy and PG_slab will only be set
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 71e5039d940d..775a2e8d600c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -22,6 +22,7 @@
#include <linux/pkeys.h>
#include <linux/minmax.h>
#include <linux/overflow.h>
+#include <linux/buildid.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -239,6 +240,67 @@ static int do_maps_open(struct inode *inode, struct file *file,
sizeof(struct proc_maps_private));
}
+static void get_vma_name(struct vm_area_struct *vma,
+ const struct path **path,
+ const char **name,
+ const char **name_fmt)
+{
+ struct anon_vma_name *anon_name = vma->vm_mm ? anon_vma_name(vma) : NULL;
+
+ *name = NULL;
+ *path = NULL;
+ *name_fmt = NULL;
+
+ /*
+ * Print the dentry name for named mappings, and a
+ * special [heap] marker for the heap:
+ */
+ if (vma->vm_file) {
+ /*
+ * If user named this anon shared memory via
+ * prctl(PR_SET_VMA ..., use the provided name.
+ */
+ if (anon_name) {
+ *name_fmt = "[anon_shmem:%s]";
+ *name = anon_name->name;
+ } else {
+ *path = file_user_path(vma->vm_file);
+ }
+ return;
+ }
+
+ if (vma->vm_ops && vma->vm_ops->name) {
+ *name = vma->vm_ops->name(vma);
+ if (*name)
+ return;
+ }
+
+ *name = arch_vma_name(vma);
+ if (*name)
+ return;
+
+ if (!vma->vm_mm) {
+ *name = "[vdso]";
+ return;
+ }
+
+ if (vma_is_initial_heap(vma)) {
+ *name = "[heap]";
+ return;
+ }
+
+ if (vma_is_initial_stack(vma)) {
+ *name = "[stack]";
+ return;
+ }
+
+ if (anon_name) {
+ *name_fmt = "[anon:%s]";
+ *name = anon_name->name;
+ return;
+ }
+}
+
static void show_vma_header_prefix(struct seq_file *m,
unsigned long start, unsigned long end,
vm_flags_t flags, unsigned long long pgoff,
@@ -262,17 +324,15 @@ static void show_vma_header_prefix(struct seq_file *m,
static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
- struct anon_vma_name *anon_name = NULL;
- struct mm_struct *mm = vma->vm_mm;
- struct file *file = vma->vm_file;
+ const struct path *path;
+ const char *name_fmt, *name;
vm_flags_t flags = vma->vm_flags;
unsigned long ino = 0;
unsigned long long pgoff = 0;
unsigned long start, end;
dev_t dev = 0;
- const char *name = NULL;
- if (file) {
+ if (vma->vm_file) {
const struct inode *inode = file_user_inode(vma->vm_file);
dev = inode->i_sb->s_dev;
@@ -283,57 +343,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
start = vma->vm_start;
end = vma->vm_end;
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
- if (mm)
- anon_name = anon_vma_name(vma);
- /*
- * Print the dentry name for named mappings, and a
- * special [heap] marker for the heap:
- */
- if (file) {
+ get_vma_name(vma, &path, &name, &name_fmt);
+ if (path) {
seq_pad(m, ' ');
- /*
- * If user named this anon shared memory via
- * prctl(PR_SET_VMA ..., use the provided name.
- */
- if (anon_name)
- seq_printf(m, "[anon_shmem:%s]", anon_name->name);
- else
- seq_path(m, file_user_path(file), "\n");
- goto done;
- }
-
- if (vma->vm_ops && vma->vm_ops->name) {
- name = vma->vm_ops->name(vma);
- if (name)
- goto done;
- }
-
- name = arch_vma_name(vma);
- if (!name) {
- if (!mm) {
- name = "[vdso]";
- goto done;
- }
-
- if (vma_is_initial_heap(vma)) {
- name = "[heap]";
- goto done;
- }
-
- if (vma_is_initial_stack(vma)) {
- name = "[stack]";
- goto done;
- }
-
- if (anon_name) {
- seq_pad(m, ' ');
- seq_printf(m, "[anon:%s]", anon_name->name);
- }
- }
-
-done:
- if (name) {
+ seq_path(m, path, "\n");
+ } else if (name_fmt) {
+ seq_pad(m, ' ');
+ seq_printf(m, name_fmt, name);
+ } else if (name) {
seq_pad(m, ' ');
seq_puts(m, name);
}
@@ -358,11 +376,268 @@ static int pid_maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_maps_op);
}
+#define PROCMAP_QUERY_VMA_FLAGS ( \
+ PROCMAP_QUERY_VMA_READABLE | \
+ PROCMAP_QUERY_VMA_WRITABLE | \
+ PROCMAP_QUERY_VMA_EXECUTABLE | \
+ PROCMAP_QUERY_VMA_SHARED \
+)
+
+#define PROCMAP_QUERY_VALID_FLAGS_MASK ( \
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA | \
+ PROCMAP_QUERY_FILE_BACKED_VMA | \
+ PROCMAP_QUERY_VMA_FLAGS \
+)
+
+static int query_vma_setup(struct mm_struct *mm)
+{
+ return mmap_read_lock_killable(mm);
+}
+
+static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+ mmap_read_unlock(mm);
+}
+
+static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr)
+{
+ return find_vma(mm, addr);
+}
+
+static struct vm_area_struct *query_matching_vma(struct mm_struct *mm,
+ unsigned long addr, u32 flags)
+{
+ struct vm_area_struct *vma;
+
+next_vma:
+ vma = query_vma_find_by_addr(mm, addr);
+ if (!vma)
+ goto no_vma;
+
+ /* user requested only file-backed VMA, keep iterating */
+ if ((flags & PROCMAP_QUERY_FILE_BACKED_VMA) && !vma->vm_file)
+ goto skip_vma;
+
+ /* VMA permissions should satisfy query flags */
+ if (flags & PROCMAP_QUERY_VMA_FLAGS) {
+ u32 perm = 0;
+
+ if (flags & PROCMAP_QUERY_VMA_READABLE)
+ perm |= VM_READ;
+ if (flags & PROCMAP_QUERY_VMA_WRITABLE)
+ perm |= VM_WRITE;
+ if (flags & PROCMAP_QUERY_VMA_EXECUTABLE)
+ perm |= VM_EXEC;
+ if (flags & PROCMAP_QUERY_VMA_SHARED)
+ perm |= VM_MAYSHARE;
+
+ if ((vma->vm_flags & perm) != perm)
+ goto skip_vma;
+ }
+
+ /* found covering VMA or user is OK with the matching next VMA */
+ if ((flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) || vma->vm_start <= addr)
+ return vma;
+
+skip_vma:
+ /*
+ * If the user needs closest matching VMA, keep iterating.
+ */
+ addr = vma->vm_end;
+ if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA)
+ goto next_vma;
+
+no_vma:
+ return ERR_PTR(-ENOENT);
+}
+
+static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
+{
+ struct procmap_query karg;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ const char *name = NULL;
+ char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL;
+ __u64 usize;
+ int err;
+
+ if (copy_from_user(&usize, (void __user *)uarg, sizeof(usize)))
+ return -EFAULT;
+ /* argument struct can never be that large, reject abuse */
+ if (usize > PAGE_SIZE)
+ return -E2BIG;
+ /* argument struct should have at least query_flags and query_addr fields */
+ if (usize < offsetofend(struct procmap_query, query_addr))
+ return -EINVAL;
+ err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize);
+ if (err)
+ return err;
+
+ /* reject unknown flags */
+ if (karg.query_flags & ~PROCMAP_QUERY_VALID_FLAGS_MASK)
+ return -EINVAL;
+ /* either both buffer address and size are set, or both should be zero */
+ if (!!karg.vma_name_size != !!karg.vma_name_addr)
+ return -EINVAL;
+ if (!!karg.build_id_size != !!karg.build_id_addr)
+ return -EINVAL;
+
+ mm = priv->mm;
+ if (!mm || !mmget_not_zero(mm))
+ return -ESRCH;
+
+ err = query_vma_setup(mm);
+ if (err) {
+ mmput(mm);
+ return err;
+ }
+
+ vma = query_matching_vma(mm, karg.query_addr, karg.query_flags);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ vma = NULL;
+ goto out;
+ }
+
+ karg.vma_start = vma->vm_start;
+ karg.vma_end = vma->vm_end;
+
+ karg.vma_flags = 0;
+ if (vma->vm_flags & VM_READ)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_READABLE;
+ if (vma->vm_flags & VM_WRITE)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_WRITABLE;
+ if (vma->vm_flags & VM_EXEC)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_EXECUTABLE;
+ if (vma->vm_flags & VM_MAYSHARE)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_SHARED;
+
+ karg.vma_page_size = vma_kernel_pagesize(vma);
+
+ if (vma->vm_file) {
+ const struct inode *inode = file_user_inode(vma->vm_file);
+
+ karg.vma_offset = ((__u64)vma->vm_pgoff) << PAGE_SHIFT;
+ karg.dev_major = MAJOR(inode->i_sb->s_dev);
+ karg.dev_minor = MINOR(inode->i_sb->s_dev);
+ karg.inode = inode->i_ino;
+ } else {
+ karg.vma_offset = 0;
+ karg.dev_major = 0;
+ karg.dev_minor = 0;
+ karg.inode = 0;
+ }
+
+ if (karg.build_id_size) {
+ __u32 build_id_sz;
+
+ err = build_id_parse(vma, build_id_buf, &build_id_sz);
+ if (err) {
+ karg.build_id_size = 0;
+ } else {
+ if (karg.build_id_size < build_id_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.build_id_size = build_id_sz;
+ }
+ }
+
+ if (karg.build_id_size) {
+ __u32 build_id_sz;
+
+ err = build_id_parse(vma, build_id_buf, &build_id_sz);
+ if (err) {
+ karg.build_id_size = 0;
+ } else {
+ if (karg.build_id_size < build_id_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.build_id_size = build_id_sz;
+ }
+ }
+
+ if (karg.vma_name_size) {
+ size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size);
+ const struct path *path;
+ const char *name_fmt;
+ size_t name_sz = 0;
+
+ get_vma_name(vma, &path, &name, &name_fmt);
+
+ if (path || name_fmt || name) {
+ name_buf = kmalloc(name_buf_sz, GFP_KERNEL);
+ if (!name_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ if (path) {
+ name = d_path(path, name_buf, name_buf_sz);
+ if (IS_ERR(name)) {
+ err = PTR_ERR(name);
+ goto out;
+ }
+ name_sz = name_buf + name_buf_sz - name;
+ } else if (name || name_fmt) {
+ name_sz = 1 + snprintf(name_buf, name_buf_sz, name_fmt ?: "%s", name);
+ name = name_buf;
+ }
+ if (name_sz > name_buf_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.vma_name_size = name_sz;
+ }
+
+ /* unlock vma or mmap_lock, and put mm_struct before copying data to user */
+ query_vma_teardown(mm, vma);
+ mmput(mm);
+
+ if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr),
+ name, karg.vma_name_size)) {
+ kfree(name_buf);
+ return -EFAULT;
+ }
+ kfree(name_buf);
+
+ if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr),
+ build_id_buf, karg.build_id_size))
+ return -EFAULT;
+
+ if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize)))
+ return -EFAULT;
+
+ return 0;
+
+out:
+ query_vma_teardown(mm, vma);
+ mmput(mm);
+ kfree(name_buf);
+ return err;
+}
+
+static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct seq_file *seq = file->private_data;
+ struct proc_maps_private *priv = seq->private;
+
+ switch (cmd) {
+ case PROCMAP_QUERY:
+ return do_procmap_query(priv, (void __user *)arg);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = proc_map_release,
+ .unlocked_ioctl = procfs_procmap_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
@@ -442,7 +717,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked,
- bool migration)
+ bool present)
{
struct folio *folio = page_folio(page);
int i, nr = compound ? compound_nr(page) : 1;
@@ -471,24 +746,29 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* Then accumulate quantities that may depend on sharing, or that may
* differ page-by-page.
*
- * refcount == 1 guarantees the page is mapped exactly once.
- * If any subpage of the compound page mapped with PTE it would elevate
- * the refcount.
+ * refcount == 1 for present entries guarantees that the folio is mapped
+ * exactly once. For large folios this implies that exactly one
+ * PTE/PMD/... maps (a part of) this folio.
*
- * The page_mapcount() is called to get a snapshot of the mapcount.
- * Without holding the page lock this snapshot can be slightly wrong as
- * we cannot always read the mapcount atomically. It is not safe to
- * call page_mapcount() even with PTL held if the page is not mapped,
- * especially for migration entries. Treat regular migration entries
- * as mapcount == 1.
+ * Treat all non-present entries (where relying on the mapcount and
+ * refcount doesn't make sense) as "maybe shared, but not sure how
+ * often". We treat device private entries as being fake-present.
+ *
+ * Note that it would not be safe to read the mapcount especially for
+ * pages referenced by migration entries, even with the PTL held.
*/
- if ((folio_ref_count(folio) == 1) || migration) {
+ if (folio_ref_count(folio) == 1 || !present) {
smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT,
- dirty, locked, true);
+ dirty, locked, present);
return;
}
+ /*
+ * We obtain a snapshot of the mapcount. Without holding the folio lock
+ * this snapshot can be slightly wrong as we cannot always read the
+ * mapcount atomically.
+ */
for (i = 0; i < nr; i++, page++) {
- int mapcount = page_mapcount(page);
+ int mapcount = folio_precise_page_mapcount(folio, page);
unsigned long pss = PAGE_SIZE << PSS_SHIFT;
if (mapcount >= 2)
pss /= mapcount;
@@ -531,13 +811,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
- bool migration = false, young = false, dirty = false;
+ bool present = false, young = false, dirty = false;
pte_t ptent = ptep_get(pte);
if (pte_present(ptent)) {
page = vm_normal_page(vma, addr, ptent);
young = pte_young(ptent);
dirty = pte_dirty(ptent);
+ present = true;
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
@@ -555,8 +836,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
}
} else if (is_pfn_swap_entry(swpent)) {
- if (is_migration_entry(swpent))
- migration = true;
+ if (is_device_private_entry(swpent))
+ present = true;
page = pfn_swap_entry_to_page(swpent);
}
} else {
@@ -567,7 +848,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, young, dirty, locked, migration);
+ smaps_account(mss, page, false, young, dirty, locked, present);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -578,18 +859,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ bool present = false;
struct folio *folio;
- bool migration = false;
if (pmd_present(*pmd)) {
page = vm_normal_page_pmd(vma, addr, *pmd);
+ present = true;
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
- if (is_migration_entry(entry)) {
- migration = true;
+ if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
- }
}
if (IS_ERR_OR_NULL(page))
return;
@@ -604,7 +884,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
- locked, migration);
+ locked, present);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -733,19 +1013,23 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
- pte_t ptent = huge_ptep_get(pte);
+ pte_t ptent = huge_ptep_get(walk->mm, addr, pte);
struct folio *folio = NULL;
+ bool present = false;
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
+ present = true;
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
folio = pfn_swap_entry_folio(swpent);
}
+
if (folio) {
- if (folio_likely_mapped_shared(folio) ||
+ /* We treat non-present entries as "maybe shared". */
+ if (!present || folio_likely_mapped_shared(folio) ||
hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
@@ -1091,7 +1375,7 @@ struct clear_refs_private {
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
- struct page *page;
+ struct folio *folio;
if (!pte_write(pte))
return false;
@@ -1099,10 +1383,10 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return false;
if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
return false;
- page = vm_normal_page(vma, addr, pte);
- if (!page)
+ folio = vm_normal_folio(vma, addr, pte);
+ if (!folio)
return false;
- return page_maybe_dma_pinned(page);
+ return folio_maybe_dma_pinned(folio);
}
static inline void clear_soft_dirty(struct vm_area_struct *vma,
@@ -1418,7 +1702,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
{
u64 frame = 0, flags = 0;
struct page *page = NULL;
- bool migration = false;
+ struct folio *folio;
if (pte_present(pte)) {
if (pm->show_pfn)
@@ -1450,17 +1734,20 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
(offset << MAX_SWAPFILES_SHIFT);
}
flags |= PM_SWAP;
- migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
if (pte_marker_entry_uffd_wp(entry))
flags |= PM_UFFD_WP;
}
- if (page && !PageAnon(page))
- flags |= PM_FILE;
- if (page && !migration && page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ if (page) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio))
+ flags |= PM_FILE;
+ if ((flags & PM_PRESENT) &&
+ folio_precise_page_mapcount(folio, page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+ }
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1476,13 +1763,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pte_t *pte, *orig_pte;
int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- bool migration = false;
ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
+ unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ struct folio *folio = NULL;
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1496,8 +1784,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_uffd_wp(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
- frame = pmd_pfn(pmd) +
- ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ frame = pmd_pfn(pmd) + idx;
}
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
else if (is_swap_pmd(pmd)) {
@@ -1506,11 +1793,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pm->show_pfn) {
if (is_pfn_swap_entry(entry))
- offset = swp_offset_pfn(entry);
+ offset = swp_offset_pfn(entry) + idx;
else
- offset = swp_offset(entry);
- offset = offset +
- ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ offset = swp_offset(entry) + idx;
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
}
@@ -1520,17 +1805,25 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
- migration = is_migration_entry(entry);
page = pfn_swap_entry_to_page(entry);
}
#endif
- if (page && !migration && page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ if (page) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio))
+ flags |= PM_FILE;
+ }
+
+ for (; addr != end; addr += PAGE_SIZE, idx++) {
+ unsigned long cur_flags = flags;
+ pagemap_entry_t pme;
- for (; addr != end; addr += PAGE_SIZE) {
- pagemap_entry_t pme = make_pme(frame, flags);
+ if (folio && (flags & PM_PRESENT) &&
+ folio_precise_page_mapcount(folio, page + idx) == 1)
+ cur_flags |= PM_MMAP_EXCLUSIVE;
+ pme = make_pme(frame, cur_flags);
err = add_to_pagemap(&pme, pm);
if (err)
break;
@@ -1585,7 +1878,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(walk->mm, addr, ptep);
if (pte_present(pte)) {
struct folio *folio = page_folio(pte_page(pte));
@@ -2274,7 +2567,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
/* Go the short route when not write-protecting pages. */
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(walk->mm, start, ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
if (!pagemap_scan_is_interesting_page(categories, p))
@@ -2286,7 +2579,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
i_mmap_lock_write(vma->vm_file->f_mapping);
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(walk->mm, start, ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
if (!pagemap_scan_is_interesting_page(categories, p))
@@ -2566,7 +2859,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
unsigned long nr_pages)
{
struct folio *folio = page_folio(page);
- int count = page_mapcount(page);
+ int count = folio_precise_page_mapcount(folio, page);
md->pages += nr_pages;
if (pte_dirty || folio_test_dirty(folio))
@@ -2682,7 +2975,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
- pte_t huge_pte = huge_ptep_get(pte);
+ pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte);
struct numa_maps *md;
struct page *page;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 6397fdefd876..c92937bed133 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1359,7 +1359,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
target_tcon = tlink_tcon(smb_file_target->tlink);
if (src_tcon->ses != target_tcon->ses) {
- cifs_dbg(VFS, "source and target of copy not on same server\n");
+ cifs_dbg(FYI, "source and target of copy not on same server\n");
goto out;
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index a865941724c0..8e86fec7dcd2 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -290,7 +290,7 @@ struct smb_version_operations {
int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *,
bool);
void (*add_credits)(struct TCP_Server_Info *server,
- const struct cifs_credits *credits,
+ struct cifs_credits *credits,
const int optype);
void (*set_credits)(struct TCP_Server_Info *, const int);
int * (*get_credits_field)(struct TCP_Server_Info *, const int);
@@ -550,8 +550,8 @@ struct smb_version_operations {
size_t *, struct cifs_credits *);
/* adjust previously taken mtu credits to request size */
int (*adjust_credits)(struct TCP_Server_Info *server,
- struct cifs_credits *credits,
- const unsigned int payload_size);
+ struct cifs_io_subrequest *subreq,
+ unsigned int /*enum smb3_rw_credits_trace*/ trace);
/* check if we need to issue closedir */
bool (*dir_needs_close)(struct cifsFileInfo *);
long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
@@ -848,6 +848,9 @@ static inline void cifs_server_unlock(struct TCP_Server_Info *server)
struct cifs_credits {
unsigned int value;
unsigned int instance;
+ unsigned int in_flight_check;
+ unsigned int rreq_debug_id;
+ unsigned int rreq_debug_index;
};
static inline unsigned int
@@ -873,7 +876,7 @@ has_credits(struct TCP_Server_Info *server, int *credits, int num_credits)
}
static inline void
-add_credits(struct TCP_Server_Info *server, const struct cifs_credits *credits,
+add_credits(struct TCP_Server_Info *server, struct cifs_credits *credits,
const int optype)
{
server->ops->add_credits(server, credits, optype);
@@ -897,11 +900,11 @@ set_credits(struct TCP_Server_Info *server, const int val)
}
static inline int
-adjust_credits(struct TCP_Server_Info *server, struct cifs_credits *credits,
- const unsigned int payload_size)
+adjust_credits(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq,
+ unsigned int /* enum smb3_rw_credits_trace */ trace)
{
return server->ops->adjust_credits ?
- server->ops->adjust_credits(server, credits, payload_size) : 0;
+ server->ops->adjust_credits(server, subreq, trace) : 0;
}
static inline __le64
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 1374635e89fa..b2405dd4d4d4 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -80,6 +80,16 @@ retry:
return netfs_prepare_write_failed(subreq);
}
+ wdata->credits.rreq_debug_id = subreq->rreq->debug_id;
+ wdata->credits.rreq_debug_index = subreq->debug_index;
+ wdata->credits.in_flight_check = 1;
+ trace_smb3_rw_credits(wdata->rreq->debug_id,
+ wdata->subreq.debug_index,
+ wdata->credits.value,
+ server->credits, server->in_flight,
+ wdata->credits.value,
+ cifs_trace_rw_credits_write_prepare);
+
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->smbd_conn)
subreq->max_nr_segs = server->smbd_conn->max_frmr_depth;
@@ -101,7 +111,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq)
goto fail;
}
- rc = adjust_credits(wdata->server, &wdata->credits, wdata->subreq.len);
+ rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust);
if (rc)
goto fail;
@@ -123,6 +133,11 @@ fail:
goto out;
}
+static void cifs_netfs_invalidate_cache(struct netfs_io_request *wreq)
+{
+ cifs_invalidate_cache(wreq->inode, 0);
+}
+
/*
* Split the read up according to how many credits we can get for each piece.
* It's okay to sleep here if we need to wait for more credit to become
@@ -158,7 +173,18 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
return false;
}
+ rdata->credits.in_flight_check = 1;
+ rdata->credits.rreq_debug_id = rreq->debug_id;
+ rdata->credits.rreq_debug_index = subreq->debug_index;
+
+ trace_smb3_rw_credits(rdata->rreq->debug_id,
+ rdata->subreq.debug_index,
+ rdata->credits.value,
+ server->credits, server->in_flight, 0,
+ cifs_trace_rw_credits_read_submit);
+
subreq->len = min_t(size_t, subreq->len, rsize);
+
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->smbd_conn)
subreq->max_nr_segs = server->smbd_conn->max_frmr_depth;
@@ -289,6 +315,15 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq)
#endif
}
+ if (rdata->credits.value != 0)
+ trace_smb3_rw_credits(rdata->rreq->debug_id,
+ rdata->subreq.debug_index,
+ rdata->credits.value,
+ rdata->server ? rdata->server->credits : 0,
+ rdata->server ? rdata->server->in_flight : 0,
+ -rdata->credits.value,
+ cifs_trace_rw_credits_free_subreq);
+
add_credits_and_wake_if(rdata->server, &rdata->credits, 0);
if (rdata->have_xid)
free_xid(rdata->xid);
@@ -307,6 +342,7 @@ const struct netfs_request_ops cifs_req_ops = {
.begin_writeback = cifs_begin_writeback,
.prepare_write = cifs_prepare_write,
.issue_write = cifs_issue_write,
+ .invalidate_cache = cifs_netfs_invalidate_cache,
};
/*
@@ -2358,13 +2394,18 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t
bool was_async)
{
struct netfs_io_request *wreq = wdata->rreq;
- loff_t new_server_eof;
+ struct netfs_inode *ictx = netfs_inode(wreq->inode);
+ loff_t wrend;
if (result > 0) {
- new_server_eof = wdata->subreq.start + wdata->subreq.transferred + result;
+ wrend = wdata->subreq.start + wdata->subreq.transferred + result;
- if (new_server_eof > netfs_inode(wreq->inode)->remote_i_size)
- netfs_resize_file(netfs_inode(wreq->inode), new_server_eof, true);
+ if (wrend > ictx->zero_point &&
+ (wdata->rreq->origin == NETFS_UNBUFFERED_WRITE ||
+ wdata->rreq->origin == NETFS_DIO_WRITE))
+ ictx->zero_point = wrend;
+ if (wrend > ictx->remote_i_size)
+ netfs_resize_file(ictx, wrend, true);
}
netfs_write_subrequest_terminated(&wdata->subreq, result, was_async);
@@ -2877,6 +2918,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
rc = netfs_start_io_direct(inode);
if (rc < 0)
goto out;
+ rc = -EACCES;
down_read(&cinode->lock_sem);
if (!cifs_find_lock_conflict(
cfile, iocb->ki_pos, iov_iter_count(to),
@@ -2889,6 +2931,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
rc = netfs_start_io_read(inode);
if (rc < 0)
goto out;
+ rc = -EACCES;
down_read(&cinode->lock_sem);
if (!cifs_find_lock_conflict(
cfile, iocb->ki_pos, iov_iter_count(to),
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 212ec6f66ec6..e1f2feb56f45 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -108,7 +108,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
static void
cifs_add_credits(struct TCP_Server_Info *server,
- const struct cifs_credits *credits, const int optype)
+ struct cifs_credits *credits, const int optype)
{
spin_lock(&server->req_lock);
server->credits += credits->value;
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index c8e536540895..7fe59235f090 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -66,7 +66,7 @@ change_conf(struct TCP_Server_Info *server)
static void
smb2_add_credits(struct TCP_Server_Info *server,
- const struct cifs_credits *credits, const int optype)
+ struct cifs_credits *credits, const int optype)
{
int *val, rc = -1;
int scredits, in_flight;
@@ -94,7 +94,21 @@ smb2_add_credits(struct TCP_Server_Info *server,
server->conn_id, server->hostname, *val,
add, server->in_flight);
}
- WARN_ON_ONCE(server->in_flight == 0);
+ if (credits->in_flight_check > 1) {
+ pr_warn_once("rreq R=%08x[%x] Credits not in flight\n",
+ credits->rreq_debug_id, credits->rreq_debug_index);
+ } else {
+ credits->in_flight_check = 2;
+ }
+ if (WARN_ON_ONCE(server->in_flight == 0)) {
+ pr_warn_once("rreq R=%08x[%x] Zero in_flight\n",
+ credits->rreq_debug_id, credits->rreq_debug_index);
+ trace_smb3_rw_credits(credits->rreq_debug_id,
+ credits->rreq_debug_index,
+ credits->value,
+ server->credits, server->in_flight, 0,
+ cifs_trace_rw_credits_zero_in_flight);
+ }
server->in_flight--;
if (server->in_flight == 0 &&
((optype & CIFS_OP_MASK) != CIFS_NEG_OP) &&
@@ -283,16 +297,23 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
static int
smb2_adjust_credits(struct TCP_Server_Info *server,
- struct cifs_credits *credits,
- const unsigned int payload_size)
+ struct cifs_io_subrequest *subreq,
+ unsigned int /*enum smb3_rw_credits_trace*/ trace)
{
- int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
+ struct cifs_credits *credits = &subreq->credits;
+ int new_val = DIV_ROUND_UP(subreq->subreq.len, SMB2_MAX_BUFFER_SIZE);
int scredits, in_flight;
if (!credits->value || credits->value == new_val)
return 0;
if (credits->value < new_val) {
+ trace_smb3_rw_credits(subreq->rreq->debug_id,
+ subreq->subreq.debug_index,
+ credits->value,
+ server->credits, server->in_flight,
+ new_val - credits->value,
+ cifs_trace_rw_credits_no_adjust_up);
trace_smb3_too_many_credits(server->CurrentMid,
server->conn_id, server->hostname, 0, credits->value - new_val, 0);
cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
@@ -308,6 +329,12 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
in_flight = server->in_flight;
spin_unlock(&server->req_lock);
+ trace_smb3_rw_credits(subreq->rreq->debug_id,
+ subreq->subreq.debug_index,
+ credits->value,
+ server->credits, server->in_flight,
+ new_val - credits->value,
+ cifs_trace_rw_credits_old_session);
trace_smb3_reconnect_detected(server->CurrentMid,
server->conn_id, server->hostname, scredits,
credits->value - new_val, in_flight);
@@ -316,6 +343,11 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
return -EAGAIN;
}
+ trace_smb3_rw_credits(subreq->rreq->debug_id,
+ subreq->subreq.debug_index,
+ credits->value,
+ server->credits, server->in_flight,
+ new_val - credits->value, trace);
server->credits += credits->value - new_val;
scredits = server->credits;
in_flight = server->in_flight;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 2ae2dbb6202b..9fc5b11c0b6c 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4502,8 +4502,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
struct TCP_Server_Info *server = rdata->server;
struct smb2_hdr *shdr =
(struct smb2_hdr *)rdata->iov[0].iov_base;
- struct cifs_credits credits = { .value = 0, .instance = 0 };
+ struct cifs_credits credits = {
+ .value = 0,
+ .instance = 0,
+ .rreq_debug_id = rdata->rreq->debug_id,
+ .rreq_debug_index = rdata->subreq.debug_index,
+ };
struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], .rq_nvec = 1 };
+ unsigned int rreq_debug_id = rdata->rreq->debug_id;
+ unsigned int subreq_debug_index = rdata->subreq.debug_index;
if (rdata->got_bytes) {
rqst.rq_iter = rdata->subreq.io_iter;
@@ -4587,10 +4594,16 @@ smb2_readv_callback(struct mid_q_entry *mid)
if (rdata->subreq.start < rdata->subreq.rreq->i_size)
rdata->result = 0;
}
+ trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value,
+ server->credits, server->in_flight,
+ 0, cifs_trace_rw_credits_read_response_clear);
rdata->credits.value = 0;
INIT_WORK(&rdata->subreq.work, smb2_readv_worker);
queue_work(cifsiod_wq, &rdata->subreq.work);
release_mid(mid);
+ trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
+ server->credits, server->in_flight,
+ credits.value, cifs_trace_rw_credits_read_response_add);
add_credits(server, &credits, 0);
}
@@ -4647,7 +4660,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
min_t(int, server->max_credits -
server->credits, credit_request));
- rc = adjust_credits(server, &rdata->credits, rdata->subreq.len);
+ rc = adjust_credits(server, rdata, cifs_trace_rw_credits_call_readv_adjust);
if (rc)
goto async_readv_out;
@@ -4766,7 +4779,14 @@ smb2_writev_callback(struct mid_q_entry *mid)
struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink);
struct TCP_Server_Info *server = wdata->server;
struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
- struct cifs_credits credits = { .value = 0, .instance = 0 };
+ struct cifs_credits credits = {
+ .value = 0,
+ .instance = 0,
+ .rreq_debug_id = wdata->rreq->debug_id,
+ .rreq_debug_index = wdata->subreq.debug_index,
+ };
+ unsigned int rreq_debug_id = wdata->rreq->debug_id;
+ unsigned int subreq_debug_index = wdata->subreq.debug_index;
ssize_t result = 0;
size_t written;
@@ -4837,9 +4857,15 @@ smb2_writev_callback(struct mid_q_entry *mid)
tcon->tid, tcon->ses->Suid,
wdata->subreq.start, wdata->subreq.len);
+ trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, wdata->credits.value,
+ server->credits, server->in_flight,
+ 0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
cifs_write_subrequest_terminated(wdata, result ?: written, true);
release_mid(mid);
+ trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
+ server->credits, server->in_flight,
+ credits.value, cifs_trace_rw_credits_write_response_add);
add_credits(server, &credits, 0);
}
@@ -4859,9 +4885,6 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
struct cifs_io_parms *io_parms = NULL;
int credit_request;
- if (!wdata->server || test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags))
- server = wdata->server = cifs_pick_channel(tcon->ses);
-
/*
* in future we may get cifs_io_parms passed in from the caller,
* but for now we construct it here...
@@ -4972,7 +4995,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
min_t(int, server->max_credits -
server->credits, credit_request));
- rc = adjust_credits(server, &wdata->credits, io_parms->length);
+ rc = adjust_credits(server, wdata, cifs_trace_rw_credits_call_writev_adjust);
if (rc)
goto async_writev_out;
@@ -4997,6 +5020,12 @@ async_writev_out:
cifs_small_buf_release(req);
out:
if (rc) {
+ trace_smb3_rw_credits(wdata->rreq->debug_id,
+ wdata->subreq.debug_index,
+ wdata->credits.value,
+ server->credits, server->in_flight,
+ -(int)wdata->credits.value,
+ cifs_trace_rw_credits_write_response_clear);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
cifs_write_subrequest_terminated(wdata, rc, true);
}
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 36d47ce59631..36d5295c2a6f 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -20,6 +20,22 @@
/*
* Specify enums for tracing information.
*/
+#define smb3_rw_credits_traces \
+ EM(cifs_trace_rw_credits_call_readv_adjust, "rd-call-adj") \
+ EM(cifs_trace_rw_credits_call_writev_adjust, "wr-call-adj") \
+ EM(cifs_trace_rw_credits_free_subreq, "free-subreq") \
+ EM(cifs_trace_rw_credits_issue_read_adjust, "rd-issu-adj") \
+ EM(cifs_trace_rw_credits_issue_write_adjust, "wr-issu-adj") \
+ EM(cifs_trace_rw_credits_no_adjust_up, "no-adj-up ") \
+ EM(cifs_trace_rw_credits_old_session, "old-session") \
+ EM(cifs_trace_rw_credits_read_response_add, "rd-resp-add") \
+ EM(cifs_trace_rw_credits_read_response_clear, "rd-resp-clr") \
+ EM(cifs_trace_rw_credits_read_submit, "rd-submit ") \
+ EM(cifs_trace_rw_credits_write_prepare, "wr-prepare ") \
+ EM(cifs_trace_rw_credits_write_response_add, "wr-resp-add") \
+ EM(cifs_trace_rw_credits_write_response_clear, "wr-resp-clr") \
+ E_(cifs_trace_rw_credits_zero_in_flight, "ZERO-IN-FLT")
+
#define smb3_tcon_ref_traces \
EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \
EM(netfs_trace_tcon_ref_free, "FRE ") \
@@ -59,7 +75,8 @@
#define EM(a, b) a,
#define E_(a, b) a
-enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte);
+enum smb3_rw_credits_trace { smb3_rw_credits_traces } __mode(byte);
+enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte);
#undef EM
#undef E_
@@ -71,6 +88,7 @@ enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte);
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define E_(a, b) TRACE_DEFINE_ENUM(a);
+smb3_rw_credits_traces;
smb3_tcon_ref_traces;
#undef EM
@@ -1316,6 +1334,41 @@ TRACE_EVENT(smb3_tcon_ref,
__entry->ref)
);
+TRACE_EVENT(smb3_rw_credits,
+ TP_PROTO(unsigned int rreq_debug_id,
+ unsigned int subreq_debug_index,
+ unsigned int subreq_credits,
+ unsigned int server_credits,
+ int server_in_flight,
+ int credit_change,
+ enum smb3_rw_credits_trace trace),
+ TP_ARGS(rreq_debug_id, subreq_debug_index, subreq_credits,
+ server_credits, server_in_flight, credit_change, trace),
+ TP_STRUCT__entry(
+ __field(unsigned int, rreq_debug_id)
+ __field(unsigned int, subreq_debug_index)
+ __field(unsigned int, subreq_credits)
+ __field(unsigned int, server_credits)
+ __field(int, in_flight)
+ __field(int, credit_change)
+ __field(enum smb3_rw_credits_trace, trace)
+ ),
+ TP_fast_assign(
+ __entry->rreq_debug_id = rreq_debug_id;
+ __entry->subreq_debug_index = subreq_debug_index;
+ __entry->subreq_credits = subreq_credits;
+ __entry->server_credits = server_credits;
+ __entry->in_flight = server_in_flight;
+ __entry->credit_change = credit_change;
+ __entry->trace = trace;
+ ),
+ TP_printk("R=%08x[%x] %s cred=%u chg=%d pool=%u ifl=%d",
+ __entry->rreq_debug_id, __entry->subreq_debug_index,
+ __print_symbolic(__entry->trace, smb3_rw_credits_traces),
+ __entry->subreq_credits, __entry->credit_change,
+ __entry->server_credits, __entry->in_flight)
+ );
+
#undef EM
#undef E_
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 012b9bd06995..adfe0d058701 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -988,10 +988,10 @@ static void
cifs_compound_callback(struct mid_q_entry *mid)
{
struct TCP_Server_Info *server = mid->server;
- struct cifs_credits credits;
-
- credits.value = server->ops->get_credits(mid);
- credits.instance = server->reconnect_instance;
+ struct cifs_credits credits = {
+ .value = server->ops->get_credits(mid),
+ .instance = server->reconnect_instance,
+ };
add_credits(server, &credits, mid->optype);
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 0e04cf8b1d89..5c2845e47cf2 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -133,8 +133,8 @@ struct ksmbd_transport_ops {
};
struct ksmbd_transport {
- struct ksmbd_conn *conn;
- struct ksmbd_transport_ops *ops;
+ struct ksmbd_conn *conn;
+ const struct ksmbd_transport_ops *ops;
};
#define KSMBD_TCP_RECV_TIMEOUT (7 * HZ)
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index aec0a7a12405..162a12685d2c 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -149,6 +149,7 @@ void ksmbd_session_destroy(struct ksmbd_session *sess)
ksmbd_tree_conn_session_logoff(sess);
ksmbd_destroy_file_table(&sess->file_table);
+ ksmbd_launch_ksmbd_durable_scavenger();
ksmbd_session_rpc_clear_list(sess);
free_channel_list(sess);
kfree(sess->Preauth_HashValue);
@@ -326,6 +327,7 @@ void destroy_previous_session(struct ksmbd_conn *conn,
ksmbd_destroy_file_table(&prev_sess->file_table);
prev_sess->state = SMB2_SESSION_EXPIRED;
+ ksmbd_launch_ksmbd_durable_scavenger();
out:
up_write(&conn->session_lock);
up_write(&sessions_table_lock);
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index e9da63f25b20..72bc88a63a40 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -11,13 +11,6 @@
#define OPLOCK_WAIT_TIME (35 * HZ)
-/* SMB2 Oplock levels */
-#define SMB2_OPLOCK_LEVEL_NONE 0x00
-#define SMB2_OPLOCK_LEVEL_II 0x01
-#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
-#define SMB2_OPLOCK_LEVEL_BATCH 0x09
-#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
-
/* Oplock states */
#define OPLOCK_STATE_NONE 0x00
#define OPLOCK_ACK_WAIT 0x01
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index c67fbc8d6683..4d24cc105ef6 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -377,6 +377,7 @@ static void server_ctrl_handle_reset(struct server_ctrl_struct *ctrl)
{
ksmbd_ipc_soft_reset();
ksmbd_conn_transport_destroy();
+ ksmbd_stop_durable_scavenger();
server_conf_free();
server_conf_init();
WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP);
diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h
index db7278181760..4fc529335271 100644
--- a/fs/smb/server/server.h
+++ b/fs/smb/server/server.h
@@ -44,6 +44,7 @@ struct ksmbd_server_config {
unsigned int max_connections;
char *conf[SERVER_CONF_WORK_GROUP + 1];
+ struct task_struct *dh_task;
};
extern struct ksmbd_server_config server_conf;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 840c71c66b30..37a39ab4ee65 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -3526,7 +3526,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_GUID_SIZE);
if (dh_info.timeout)
fp->durable_timeout = min(dh_info.timeout,
- 300000);
+ DURABLE_HANDLE_MAX_TIMEOUT);
else
fp->durable_timeout = 60;
}
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 643f5e1cfe35..3be7d5ae65a8 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -72,6 +72,8 @@ struct create_durable_req_v2 {
__u8 CreateGuid[16];
} __packed;
+#define DURABLE_HANDLE_MAX_TIMEOUT 300000
+
struct create_durable_reconn_req {
struct create_context_hdr ccontext;
__u8 Name[8];
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 8faa25c6e129..cf4418f72772 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -164,7 +164,7 @@ enum {
SMB_DIRECT_MSG_DATA_TRANSFER
};
-static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
+static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
struct smb_direct_send_ctx {
struct list_head msg_list;
@@ -2292,7 +2292,7 @@ out:
return rdma_capable;
}
-static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
+static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.prepare = smb_direct_prepare,
.disconnect = smb_direct_disconnect,
.shutdown = smb_direct_shutdown,
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index 6633fa78e9b9..a84788396daa 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -37,7 +37,7 @@ struct tcp_transport {
unsigned int nr_iov;
};
-static struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
+static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
static void tcp_stop_kthread(struct task_struct *kthread);
static struct interface *alloc_iface(char *ifname);
@@ -649,7 +649,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
return 0;
}
-static struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
+static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
.read = ksmbd_tcp_read,
.writev = ksmbd_tcp_writev,
.disconnect = ksmbd_tcp_disconnect,
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 8b2e37c8716e..4d4ee696e37c 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -8,6 +8,8 @@
#include <linux/filelock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include "glob.h"
#include "vfs_cache.h"
@@ -17,6 +19,7 @@
#include "mgmt/tree_connect.h"
#include "mgmt/user_session.h"
#include "smb_common.h"
+#include "server.h"
#define S_DEL_PENDING 1
#define S_DEL_ON_CLS 2
@@ -31,6 +34,10 @@ static struct ksmbd_file_table global_ft;
static atomic_long_t fd_limit;
static struct kmem_cache *filp_cache;
+static bool durable_scavenger_running;
+static DEFINE_MUTEX(durable_scavenger_lock);
+static wait_queue_head_t dh_wq;
+
void ksmbd_set_fd_limit(unsigned long limit)
{
limit = min(limit, get_max_files());
@@ -280,9 +287,16 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
if (!has_file_id(fp->persistent_id))
return;
- write_lock(&global_ft.lock);
idr_remove(global_ft.idr, fp->persistent_id);
+}
+
+static void ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+{
+ write_lock(&global_ft.lock);
+ __ksmbd_remove_durable_fd(fp);
write_unlock(&global_ft.lock);
+ if (waitqueue_active(&dh_wq))
+ wake_up(&dh_wq);
}
static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
@@ -305,7 +319,7 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
struct ksmbd_lock *smb_lock, *tmp_lock;
fd_limit_close();
- __ksmbd_remove_durable_fd(fp);
+ ksmbd_remove_durable_fd(fp);
if (ft)
__ksmbd_remove_fd(ft, fp);
@@ -477,7 +491,10 @@ struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id)
struct ksmbd_file *fp;
fp = __ksmbd_lookup_fd(&global_ft, id);
- if (fp && fp->conn) {
+ if (fp && (fp->conn ||
+ (fp->durable_scavenger_timeout &&
+ (fp->durable_scavenger_timeout <
+ jiffies_to_msecs(jiffies))))) {
ksmbd_put_durable_fd(fp);
fp = NULL;
}
@@ -694,6 +711,142 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
return fp->tcon != tcon;
}
+static bool ksmbd_durable_scavenger_alive(void)
+{
+ mutex_lock(&durable_scavenger_lock);
+ if (!durable_scavenger_running) {
+ mutex_unlock(&durable_scavenger_lock);
+ return false;
+ }
+ mutex_unlock(&durable_scavenger_lock);
+
+ if (kthread_should_stop())
+ return false;
+
+ if (idr_is_empty(global_ft.idr))
+ return false;
+
+ return true;
+}
+
+static void ksmbd_scavenger_dispose_dh(struct list_head *head)
+{
+ while (!list_empty(head)) {
+ struct ksmbd_file *fp;
+
+ fp = list_first_entry(head, struct ksmbd_file, node);
+ list_del_init(&fp->node);
+ __ksmbd_close_fd(NULL, fp);
+ }
+}
+
+static int ksmbd_durable_scavenger(void *dummy)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+ unsigned int min_timeout = 1;
+ bool found_fp_timeout;
+ LIST_HEAD(scavenger_list);
+ unsigned long remaining_jiffies;
+
+ __module_get(THIS_MODULE);
+
+ set_freezable();
+ while (ksmbd_durable_scavenger_alive()) {
+ if (try_to_freeze())
+ continue;
+
+ found_fp_timeout = false;
+
+ remaining_jiffies = wait_event_timeout(dh_wq,
+ ksmbd_durable_scavenger_alive() == false,
+ __msecs_to_jiffies(min_timeout));
+ if (remaining_jiffies)
+ min_timeout = jiffies_to_msecs(remaining_jiffies);
+ else
+ min_timeout = DURABLE_HANDLE_MAX_TIMEOUT;
+
+ write_lock(&global_ft.lock);
+ idr_for_each_entry(global_ft.idr, fp, id) {
+ if (!fp->durable_timeout)
+ continue;
+
+ if (atomic_read(&fp->refcount) > 1 ||
+ fp->conn)
+ continue;
+
+ found_fp_timeout = true;
+ if (fp->durable_scavenger_timeout <=
+ jiffies_to_msecs(jiffies)) {
+ __ksmbd_remove_durable_fd(fp);
+ list_add(&fp->node, &scavenger_list);
+ } else {
+ unsigned long durable_timeout;
+
+ durable_timeout =
+ fp->durable_scavenger_timeout -
+ jiffies_to_msecs(jiffies);
+
+ if (min_timeout > durable_timeout)
+ min_timeout = durable_timeout;
+ }
+ }
+ write_unlock(&global_ft.lock);
+
+ ksmbd_scavenger_dispose_dh(&scavenger_list);
+
+ if (found_fp_timeout == false)
+ break;
+ }
+
+ mutex_lock(&durable_scavenger_lock);
+ durable_scavenger_running = false;
+ mutex_unlock(&durable_scavenger_lock);
+
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+void ksmbd_launch_ksmbd_durable_scavenger(void)
+{
+ if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE))
+ return;
+
+ mutex_lock(&durable_scavenger_lock);
+ if (durable_scavenger_running == true) {
+ mutex_unlock(&durable_scavenger_lock);
+ return;
+ }
+
+ durable_scavenger_running = true;
+
+ server_conf.dh_task = kthread_run(ksmbd_durable_scavenger,
+ (void *)NULL, "ksmbd-durable-scavenger");
+ if (IS_ERR(server_conf.dh_task))
+ pr_err("cannot start conn thread, err : %ld\n",
+ PTR_ERR(server_conf.dh_task));
+ mutex_unlock(&durable_scavenger_lock);
+}
+
+void ksmbd_stop_durable_scavenger(void)
+{
+ if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE))
+ return;
+
+ mutex_lock(&durable_scavenger_lock);
+ if (!durable_scavenger_running) {
+ mutex_unlock(&durable_scavenger_lock);
+ return;
+ }
+
+ durable_scavenger_running = false;
+ if (waitqueue_active(&dh_wq))
+ wake_up(&dh_wq);
+ mutex_unlock(&durable_scavenger_lock);
+ kthread_stop(server_conf.dh_task);
+}
+
static bool session_fd_check(struct ksmbd_tree_connect *tcon,
struct ksmbd_file *fp)
{
@@ -718,6 +871,10 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon,
fp->tcon = NULL;
fp->volatile_id = KSMBD_NO_FID;
+ if (fp->durable_timeout)
+ fp->durable_scavenger_timeout =
+ jiffies_to_msecs(jiffies) + fp->durable_timeout;
+
return true;
}
@@ -750,11 +907,12 @@ void ksmbd_free_global_file_table(void)
unsigned int id;
idr_for_each_entry(global_ft.idr, fp, id) {
- __ksmbd_remove_durable_fd(fp);
- kmem_cache_free(filp_cache, fp);
+ ksmbd_remove_durable_fd(fp);
+ __ksmbd_close_fd(NULL, fp);
}
- ksmbd_destroy_file_table(&global_ft);
+ idr_destroy(global_ft.idr);
+ kfree(global_ft.idr);
}
int ksmbd_validate_name_reconnect(struct ksmbd_share_config *share,
@@ -810,6 +968,7 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp)
}
up_write(&ci->m_lock);
+ fp->f_state = FP_NEW;
__open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID);
if (!has_file_id(fp->volatile_id)) {
fp->conn = NULL;
@@ -849,6 +1008,8 @@ int ksmbd_init_file_cache(void)
if (!filp_cache)
goto out;
+ init_waitqueue_head(&dh_wq);
+
return 0;
out:
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 5a225e7055f1..b0f6d0f94cb8 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -101,6 +101,7 @@ struct ksmbd_file {
struct list_head lock_list;
int durable_timeout;
+ int durable_scavenger_timeout;
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
@@ -152,6 +153,8 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
+void ksmbd_launch_ksmbd_durable_scavenger(void);
+void ksmbd_stop_durable_scavenger(void);
void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
void ksmbd_close_session_fds(struct ksmbd_work *work);
int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 44666afc6209..bc625788589c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1540,4 +1540,5 @@ static void __exit exit_ufs_fs(void)
module_init(init_ufs_fs)
module_exit(exit_ufs_fs)
+MODULE_DESCRIPTION("UFS Filesystem");
MODULE_LICENSE("GPL");
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 17e409ceaa33..27a3e9285fbf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -257,7 +257,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
goto out;
ret = false;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep);
/*
* Lockless access: we're in a wait_event so it's ok if it