aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/cmservice.c6
-rw-r--r--fs/afs/rxrpc.c24
-rw-r--r--fs/aio.c4
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/binfmt_elf_fdpic.c4
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/fs.h6
-rw-r--r--fs/btrfs/qgroup.c39
-rw-r--r--fs/btrfs/raid56.c14
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/space-info.c3
-rw-r--r--fs/btrfs/tree-log.c70
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c83
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/ceph/addr.c17
-rw-r--r--fs/ceph/caps.c16
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/mds_client.c36
-rw-r--r--fs/ceph/snap.c36
-rw-r--r--fs/ceph/super.h11
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/cifs/dfs_cache.c244
-rw-r--r--fs/cifs/dfs_cache.h3
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/link.c1
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb1ops.c63
-rw-r--r--fs/cifs/smb2pdu.c21
-rw-r--r--fs/cifs/smbdirect.c1
-rw-r--r--fs/coredump.c48
-rw-r--r--fs/dax.c5
-rw-r--r--fs/erofs/super.c13
-rw-r--r--fs/erofs/zdata.c12
-rw-r--r--fs/erofs/zmap.c10
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/freevxfs/Kconfig2
-rw-r--r--fs/fscache/volume.c14
-rw-r--r--fs/fuse/acl.c68
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c21
-rw-r--r--fs/fuse/xattr.c51
-rw-r--r--fs/gfs2/log.c11
-rw-r--r--fs/ksmbd/auth.c3
-rw-r--r--fs/ksmbd/connection.c24
-rw-r--r--fs/ksmbd/ksmbd_netlink.h3
-rw-r--r--fs/ksmbd/ndr.c8
-rw-r--r--fs/ksmbd/server.h1
-rw-r--r--fs/ksmbd/smb2pdu.c9
-rw-r--r--fs/ksmbd/smb2pdu.h5
-rw-r--r--fs/ksmbd/transport_ipc.c3
-rw-r--r--fs/ksmbd/transport_tcp.c22
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfsd/filecache.c101
-rw-r--r--fs/nfsd/filecache.h5
-rw-r--r--fs/nfsd/netns.h2
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfs4state.c48
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfsctl.c7
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsproc.c4
-rw-r--r--fs/nfsd/trace.h52
-rw-r--r--fs/nilfs2/btree.c15
-rw-r--r--fs/nilfs2/ioctl.c7
-rw-r--r--fs/nilfs2/super.c9
-rw-r--r--fs/nilfs2/the_nilfs.c8
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/squashfs/squashfs_fs.h2
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c2
-rw-r--r--fs/userfaultfd.c28
-rw-r--r--fs/zonefs/super.c22
81 files changed, 915 insertions, 585 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index cefa222f7881..8daeed31e1af 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -880,7 +880,7 @@ affs_truncate(struct inode *inode)
if (inode->i_size > AFFS_I(inode)->mmu_private) {
struct address_space *mapping = inode->i_mapping;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
loff_t isize = inode->i_size;
int res;
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 7dcd59693a0c..d4ddb20d6732 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -13,6 +13,8 @@
#include "internal.h"
#include "afs_cm.h"
#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
static int afs_deliver_cb_init_call_back_state(struct afs_call *);
static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call)
* Abort a service call from within an action function.
*/
static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
- const char *why)
+ enum rxrpc_abort_reason why)
{
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, error, why);
@@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
afs_send_empty_reply(call);
else
- afs_abort_service_call(call, 1, 1, "K-1");
+ afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative);
afs_put_call(call);
_leave("");
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c62939e5ea1f..7817e2b860e5 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -13,6 +13,8 @@
#include "internal.h"
#include "afs_cm.h"
#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
struct workqueue_struct *afs_async_calls;
@@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
error_do_abort:
if (ret != -ECONNABORTED) {
rxrpc_kernel_abort_call(call->net->socket, rxcall,
- RX_USER_ABORT, ret, "KSD");
+ RX_USER_ABORT, ret,
+ afs_abort_send_data_error);
} else {
len = 0;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
@@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KIV");
+ abort_code, ret,
+ afs_abort_op_not_supported);
goto local_abort;
case -EIO:
pr_err("kAFS: Call %u in bad state %u\n",
@@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call)
if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KUM");
+ abort_code, ret,
+ afs_abort_unmarshal_error);
goto local_abort;
default:
abort_code = RX_CALL_DEAD;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, ret, "KER");
+ abort_code, ret,
+ afs_abort_general_error);
goto local_abort;
}
}
@@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
/* Kill off the call if it's still live. */
_debug("call interrupted");
if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- RX_USER_ABORT, -EINTR, "KWI"))
+ RX_USER_ABORT, -EINTR,
+ afs_abort_interrupted))
afs_set_call_complete(call, -EINTR, 0);
}
}
@@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call)
case -ENOMEM:
_debug("oom");
rxrpc_kernel_abort_call(net->socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ afs_abort_oom);
fallthrough;
default:
_leave(" [error]");
@@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
if (n == -ENOMEM) {
_debug("oom");
rxrpc_kernel_abort_call(net->socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ afs_abort_oom);
}
_leave(" [error]");
}
@@ -900,6 +909,7 @@ int afs_extract_data(struct afs_call *call, bool want_more)
ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
&call->iov_len, want_more, &remote_abort,
&call->service_id);
+ trace_afs_receive_data(call, call->iter, want_more, ret);
if (ret == 0 || ret == -EAGAIN)
return ret;
diff --git a/fs/aio.c b/fs/aio.c
index 562916d85cba..e85ba0b77f59 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
table = rcu_dereference(mm->ioctx_table);
+ if (!table)
+ goto out_unlock;
+
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
@@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
}
}
+out_unlock:
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
return res;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index de63572a9404..9a780fafc539 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm)
* The number of segs are recored into ELF header as 16bit value.
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
*/
- segs = cprm->vma_count + elf_core_extra_phdrs();
+ segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
/* for notes section */
segs++;
@@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += cprm->vma_data_size;
- offset += elf_core_extra_data_size();
+ offset += elf_core_extra_data_size(cprm);
e_shoff = offset;
if (e_phnum == PN_XNUM) {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 096e3520a0b1..a05eafcacfb2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
tmp->next = thread_list;
thread_list = tmp;
- segs = cprm->vma_count + elf_core_extra_phdrs();
+ segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
/* for notes section */
segs++;
@@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
offset += cprm->vma_data_size;
- offset += elf_core_extra_data_size();
+ offset += elf_core_extra_data_size(cprm);
e_shoff = offset;
if (e_phnum == PN_XNUM) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8aeaada1fcae..3aa04224315e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -367,7 +367,14 @@ error:
btrfs_print_tree(eb, 0);
btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
eb->start);
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ /*
+ * Be noisy if this is an extent buffer from a log tree. We don't abort
+ * a transaction in case there's a bad log tree extent buffer, we just
+ * fallback to a transaction commit. Still we want to know when there is
+ * a bad log tree extent buffer, as that may signal a bug somewhere.
+ */
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
+ btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9bd32daa9b9a..3bbf8703db2a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3826,6 +3826,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
lockend = round_up(start + len, inode->root->fs_info->sectorsize);
prev_extent_end = lockstart;
+ btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
@@ -4019,6 +4020,7 @@ check_eof_delalloc:
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
free_extent_state(delalloc_cached_state);
btrfs_free_backref_share_ctx(backref_ctx);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 834bbcb91102..af046d22300e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3541,6 +3541,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *extent;
u64 extent_end;
+ u8 type;
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
@@ -3596,10 +3597,16 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
extent = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
+ type = btrfs_file_extent_type(leaf, extent);
- if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 ||
- btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_PREALLOC) {
+ /*
+ * Can't access the extent's disk_bytenr field if this is an
+ * inline extent, since at that offset, it's where the extent
+ * data starts.
+ */
+ if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+ (type == BTRFS_FILE_EXTENT_REG &&
+ btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) {
/*
* Explicit hole or prealloc extent, search for delalloc.
* A prealloc extent is treated like a hole.
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index a749367e5ae2..37b86acfcbcf 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -119,6 +119,12 @@ enum {
/* Indicate that we want to commit the transaction. */
BTRFS_FS_NEED_TRANS_COMMIT,
+ /*
+ * Indicate metadata over-commit is disabled. This is set when active
+ * zone tracking is needed.
+ */
+ BTRFS_FS_NO_OVERCOMMIT,
+
#if BITS_PER_LONG == 32
/* Indicate if we have error/warn message printed on 32bit systems */
BTRFS_FS_32BIT_ERROR,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d275bf24b250..af97413abcf4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2765,9 +2765,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
/*
* Old roots should be searched when inserting qgroup
- * extent record
+ * extent record.
+ *
+ * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case,
+ * we may have some record inserted during
+ * NO_ACCOUNTING (thus no old_roots populated), but
+ * later we start rescan, which clears NO_ACCOUNTING,
+ * leaving some inserted records without old_roots
+ * populated.
+ *
+ * Those cases are rare and should not cause too much
+ * time spent during commit_transaction().
*/
- if (WARN_ON(!record->old_roots)) {
+ if (!record->old_roots) {
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(&ctx, false);
if (ret < 0)
@@ -3357,6 +3367,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
int err = -ENOMEM;
int ret = 0;
bool stopped = false;
+ bool did_leaf_rescans = false;
path = btrfs_alloc_path();
if (!path)
@@ -3377,6 +3388,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
}
err = qgroup_rescan_leaf(trans, path);
+ did_leaf_rescans = true;
if (err > 0)
btrfs_commit_transaction(trans);
@@ -3397,16 +3409,23 @@ out:
mutex_unlock(&fs_info->qgroup_rescan_lock);
/*
- * only update status, since the previous part has already updated the
- * qgroup info.
+ * Only update status, since the previous part has already updated the
+ * qgroup info, and only if we did any actual work. This also prevents
+ * race with a concurrent quota disable, which has already set
+ * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
+ * btrfs_quota_disable().
*/
- trans = btrfs_start_transaction(fs_info->quota_root, 1);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ if (did_leaf_rescans) {
+ trans = btrfs_start_transaction(fs_info->quota_root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ trans = NULL;
+ btrfs_err(fs_info,
+ "fail to start transaction for status update: %d",
+ err);
+ }
+ } else {
trans = NULL;
- btrfs_err(fs_info,
- "fail to start transaction for status update: %d",
- err);
}
mutex_lock(&fs_info->qgroup_rescan_lock);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 6a2cf754912d..ff4b1d583788 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1426,12 +1426,20 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi
u32 bio_size = 0;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
+ int i;
bio_for_each_segment_all(bvec, bio, iter_all)
bio_size += bvec->bv_len;
- bitmap_set(rbio->error_bitmap, total_sector_nr,
- bio_size >> rbio->bioc->fs_info->sectorsize_bits);
+ /*
+ * Since we can have multiple bios touching the error_bitmap, we cannot
+ * call bitmap_set() without protection.
+ *
+ * Instead use set_bit() for each bit, as set_bit() itself is atomic.
+ */
+ for (i = total_sector_nr; i < total_sector_nr +
+ (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++)
+ set_bit(i, rbio->error_bitmap);
}
/* Verify the data sectors at read time. */
@@ -1886,7 +1894,7 @@ pstripe:
sector->uptodate = 1;
}
if (failb >= 0) {
- ret = verify_one_sector(rbio, faila, sector_nr);
+ ret = verify_one_sector(rbio, failb, sector_nr);
if (ret < 0)
goto cleanup;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e65e6b6600a7..d50182b6deec 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -8073,10 +8073,10 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
/*
* Check that we don't overflow at later allocations, we request
* clone_sources_count + 1 items, and compare to unsigned long inside
- * access_ok.
+ * access_ok. Also set an upper limit for allocation size so this can't
+ * easily exhaust memory. Max number of clone sources is about 200K.
*/
- if (arg->clone_sources_count >
- ULONG_MAX / sizeof(struct clone_root) - 1) {
+ if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
ret = -EINVAL;
goto out;
}
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d28ee4e36f3d..69c09508afb5 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -407,7 +407,8 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
return 0;
used = btrfs_space_info_used(space_info, true);
- if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
+ if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
avail = 0;
else
avail = calc_available_free_space(fs_info, space_info, flush);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb52aa060093..58599189bd18 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2980,7 +2980,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = 0;
if (ret) {
blk_finish_plug(&plug);
- btrfs_abort_transaction(trans, ret);
btrfs_set_log_full_commit(trans);
mutex_unlock(&root->log_mutex);
goto out;
@@ -3045,15 +3044,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
blk_finish_plug(&plug);
btrfs_set_log_full_commit(trans);
-
- if (ret != -ENOSPC) {
- btrfs_abort_transaction(trans, ret);
- mutex_unlock(&log_root_tree->log_mutex);
- goto out;
- }
+ if (ret != -ENOSPC)
+ btrfs_err(fs_info,
+ "failed to update log for root %llu ret %d",
+ root->root_key.objectid, ret);
btrfs_wait_tree_log_extents(log, mark);
mutex_unlock(&log_root_tree->log_mutex);
- ret = BTRFS_LOG_FORCE_COMMIT;
goto out;
}
@@ -3112,7 +3108,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
} else if (ret) {
btrfs_set_log_full_commit(trans);
- btrfs_abort_transaction(trans, ret);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
@@ -3581,17 +3576,19 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
}
static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
+ struct btrfs_inode *inode,
struct extent_buffer *src,
struct btrfs_path *dst_path,
int start_slot,
int count)
{
+ struct btrfs_root *log = inode->root->log_root;
char *ins_data = NULL;
struct btrfs_item_batch batch;
struct extent_buffer *dst;
unsigned long src_offset;
unsigned long dst_offset;
+ u64 last_index;
struct btrfs_key key;
u32 item_size;
int ret;
@@ -3649,6 +3646,19 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans,
src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1);
copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size);
btrfs_release_path(dst_path);
+
+ last_index = batch.keys[count - 1].offset;
+ ASSERT(last_index > inode->last_dir_index_offset);
+
+ /*
+ * If for some unexpected reason the last item's index is not greater
+ * than the last index we logged, warn and return an error to fallback
+ * to a transaction commit.
+ */
+ if (WARN_ON(last_index <= inode->last_dir_index_offset))
+ ret = -EUCLEAN;
+ else
+ inode->last_dir_index_offset = last_index;
out:
kfree(ins_data);
@@ -3698,7 +3708,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
}
di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
- ctx->last_dir_item_offset = key.offset;
/*
* Skip ranges of items that consist only of dir item keys created
@@ -3761,7 +3770,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
if (batch_size > 0) {
int ret;
- ret = flush_dir_items_batch(trans, log, src, dst_path,
+ ret = flush_dir_items_batch(trans, inode, src, dst_path,
batch_start, batch_size);
if (ret < 0)
return ret;
@@ -3826,7 +3835,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->slots[0]);
if (tmp.type == BTRFS_DIR_INDEX_KEY)
last_old_dentry_offset = tmp.offset;
+ } else if (ret < 0) {
+ err = ret;
}
+
goto done;
}
@@ -3846,19 +3858,34 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
*/
if (tmp.type == BTRFS_DIR_INDEX_KEY)
last_old_dentry_offset = tmp.offset;
+ } else if (ret < 0) {
+ err = ret;
+ goto done;
}
+
btrfs_release_path(path);
/*
- * Find the first key from this transaction again. See the note for
- * log_new_dir_dentries, if we're logging a directory recursively we
- * won't be holding its i_mutex, which means we can modify the directory
- * while we're logging it. If we remove an entry between our first
- * search and this search we'll not find the key again and can just
- * bail.
+ * Find the first key from this transaction again or the one we were at
+ * in the loop below in case we had to reschedule. We may be logging the
+ * directory without holding its VFS lock, which happen when logging new
+ * dentries (through log_new_dir_dentries()) or in some cases when we
+ * need to log the parent directory of an inode. This means a dir index
+ * key might be deleted from the inode's root, and therefore we may not
+ * find it anymore. If we can't find it, just move to the next key. We
+ * can not bail out and ignore, because if we do that we will simply
+ * not log dir index keys that come after the one that was just deleted
+ * and we can end up logging a dir index range that ends at (u64)-1
+ * (@last_offset is initialized to that), resulting in removing dir
+ * entries we should not remove at log replay time.
*/
search:
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
+ if (ret > 0)
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ err = ret;
+ /* If ret is 1, there are no more keys in the inode's root. */
if (ret != 0)
goto done;
@@ -4031,7 +4058,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
min_key = BTRFS_DIR_START_INDEX;
max_key = 0;
- ctx->last_dir_item_offset = inode->last_dir_index_offset;
while (1) {
ret = log_dir_items(trans, inode, path, dst_path,
@@ -4043,8 +4069,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
min_key = max_key + 1;
}
- inode->last_dir_index_offset = ctx->last_dir_item_offset;
-
return 0;
}
@@ -5580,8 +5604,10 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
* LOG_INODE_EXISTS mode) and slow down other fsyncs or transaction
* commits.
*/
- if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES)
+ if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) {
+ btrfs_set_log_full_commit(trans);
return BTRFS_LOG_FORCE_COMMIT;
+ }
inode = btrfs_iget(root->fs_info->sb, ino, root);
/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 85b43075ac58..85cd24cb0540 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -24,8 +24,6 @@ struct btrfs_log_ctx {
bool logging_new_delayed_dentries;
/* Indicate if the inode being logged was logged before. */
bool logged_before;
- /* Tracks the last logged dir item/index key offset. */
- u64 last_dir_item_offset;
struct inode *inode;
struct list_head list;
/* Only used for fast fsyncs. */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index aa25fa335d3e..df43093b7a46 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -403,6 +403,7 @@ void btrfs_free_device(struct btrfs_device *device)
static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device;
+
WARN_ON(fs_devices->opened);
while (!list_empty(&fs_devices->devices)) {
device = list_entry(fs_devices->devices.next,
@@ -768,8 +769,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
error = lookup_bdev(path, &path_devt);
- if (error)
+ if (error) {
+ btrfs_err(NULL, "failed to lookup block device for path %s: %d",
+ path, error);
return ERR_PTR(error);
+ }
if (fsid_change_in_progress) {
if (!has_metadata_uuid)
@@ -836,6 +840,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
unsigned int nofs_flag;
if (fs_devices->opened) {
+ btrfs_err(NULL,
+ "device %s belongs to fsid %pU, and the fs is already mounted",
+ path, fs_devices->fsid);
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
}
@@ -905,6 +912,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
* generation are equal.
*/
mutex_unlock(&fs_devices->device_list_mutex);
+ btrfs_err(NULL,
+"device %s already registered with a higher generation, found %llu expect %llu",
+ path, found_transid, device->generation);
return ERR_PTR(-EEXIST);
}
@@ -1172,9 +1182,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
mutex_lock(&uuid_mutex);
close_fs_devices(fs_devices);
- if (!fs_devices->opened)
+ if (!fs_devices->opened) {
list_splice_init(&fs_devices->seed_list, &list);
+ /*
+ * If the struct btrfs_fs_devices is not assembled with any
+ * other device, it can be re-initialized during the next mount
+ * without the needing device-scan step. Therefore, it can be
+ * fully freed.
+ */
+ if (fs_devices->num_devices == 1) {
+ list_del(&fs_devices->fs_list);
+ free_fs_devices(fs_devices);
+ }
+ }
+
+
list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
close_fs_devices(fs_devices);
list_del(&fs_devices->seed_list);
@@ -1591,7 +1614,7 @@ again:
if (ret < 0)
goto out;
- while (1) {
+ while (search_start < search_end) {
l = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(l)) {
@@ -1614,6 +1637,9 @@ again:
if (key.type != BTRFS_DEV_EXTENT_KEY)
goto next;
+ if (key.offset > search_end)
+ break;
+
if (key.offset > search_start) {
hole_size = key.offset - search_start;
dev_extent_hole_check(device, &search_start, &hole_size,
@@ -1674,6 +1700,7 @@ next:
else
ret = 0;
+ ASSERT(max_hole_start + max_hole_size <= search_end);
out:
btrfs_free_path(path);
*start = max_hole_start;
@@ -2005,42 +2032,42 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
return num_devices;
}
+static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
+ struct block_device *bdev, int copy_num)
+{
+ struct btrfs_super_block *disk_super;
+ const size_t len = sizeof(disk_super->magic);
+ const u64 bytenr = btrfs_sb_offset(copy_num);
+ int ret;
+
+ disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr);
+ if (IS_ERR(disk_super))
+ return;
+
+ memset(&disk_super->magic, 0, len);
+ folio_mark_dirty(virt_to_folio(disk_super));
+ btrfs_release_disk_super(disk_super);
+
+ ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1);
+ if (ret)
+ btrfs_warn(fs_info, "error clearing superblock number %d (%d)",
+ copy_num, ret);
+}
+
void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
struct block_device *bdev,
const char *device_path)
{
- struct btrfs_super_block *disk_super;
int copy_num;
if (!bdev)
return;
for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
- struct page *page;
- int ret;
-
- disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
- if (IS_ERR(disk_super))
- continue;
-
- if (bdev_is_zoned(bdev)) {
+ if (bdev_is_zoned(bdev))
btrfs_reset_sb_log_zones(bdev, copy_num);
- continue;
- }
-
- memset(&disk_super->magic, 0, sizeof(disk_super->magic));
-
- page = virt_to_page(disk_super);
- set_page_dirty(page);
- lock_page(page);
- /* write_on_page() unlocks the page */
- ret = write_one_page(page);
- if (ret)
- btrfs_warn(fs_info,
- "error clearing superblock number %d (%d)",
- copy_num, ret);
- btrfs_release_disk_super(disk_super);
-
+ else
+ btrfs_scratch_superblock(fs_info, bdev, copy_num);
}
/* Notify udev that device has changed */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 01a13de11832..da7bb9187b68 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
- workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL);
+ workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL);
workspace->level = level;
workspace->buf = NULL;
/*
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a759668477bb..1f503e8e42d4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -539,6 +539,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
atomic_set(&zone_info->active_zones_left,
max_active_zones - nactive);
+ /* Overcommit does not work well with active zone tacking. */
+ set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
}
/* Validate superblock log */
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c74871e37c9..cac4083e387a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -305,7 +305,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct inode *inode = rreq->inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_request *req;
+ struct ceph_osd_request *req = NULL;
struct ceph_vino vino = ceph_vino(inode);
struct iov_iter iter;
struct page **pages;
@@ -313,6 +313,11 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
int err = 0;
u64 len = subreq->len;
+ if (ceph_inode_is_shutdown(inode)) {
+ err = -EIO;
+ goto out;
+ }
+
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
@@ -563,6 +568,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p idx %lu\n", page, page->index);
+ if (ceph_inode_is_shutdown(inode))
+ return -EIO;
+
/* verify this is a writeable snap context */
snapc = page_snap_context(page);
if (!snapc) {
@@ -1643,7 +1651,7 @@ int ceph_uninline_data(struct file *file)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req = NULL;
- struct ceph_cap_flush *prealloc_cf;
+ struct ceph_cap_flush *prealloc_cf = NULL;
struct folio *folio = NULL;
u64 inline_version = CEPH_INLINE_NONE;
struct page *pages[1];
@@ -1657,6 +1665,11 @@ int ceph_uninline_data(struct file *file)
dout("uninline_data %p %llx.%llx inline_version %llu\n",
inode, ceph_vinop(inode), inline_version);
+ if (ceph_inode_is_shutdown(inode)) {
+ err = -EIO;
+ goto out;
+ }
+
if (inline_version == CEPH_INLINE_NONE)
return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2ce6cc443b48..7cc20772eac9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4079,6 +4079,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
void *p, *end;
struct cap_extra_info extra_info = {};
bool queue_trunc;
+ bool close_sessions = false;
dout("handle_caps from mds%d\n", session->s_mds);
@@ -4216,9 +4217,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
realm = NULL;
if (snaptrace_len) {
down_write(&mdsc->snap_rwsem);
- ceph_update_snap_trace(mdsc, snaptrace,
- snaptrace + snaptrace_len,
- false, &realm);
+ if (ceph_update_snap_trace(mdsc, snaptrace,
+ snaptrace + snaptrace_len,
+ false, &realm)) {
+ up_write(&mdsc->snap_rwsem);
+ close_sessions = true;
+ goto done;
+ }
downgrade_write(&mdsc->snap_rwsem);
} else {
down_read(&mdsc->snap_rwsem);
@@ -4278,6 +4283,11 @@ done_unlocked:
iput(inode);
out:
ceph_put_string(extra_info.pool_ns);
+
+ /* Defer closing the sessions after s_mutex lock being released */
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
+
return;
flush_cap_releases:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 764598e1efd9..b5cff85925a1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2011,6 +2011,9 @@ static int ceph_zero_partial_object(struct inode *inode,
loff_t zero = 0;
int op;
+ if (ceph_inode_is_shutdown(inode))
+ return -EIO;
+
if (!length) {
op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
length = &zero;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 26a0a8b9975e..27a245d959c0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -806,6 +806,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
{
struct ceph_mds_session *s;
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
+ return ERR_PTR(-EIO);
+
if (mds >= mdsc->mdsmap->possible_max_rank)
return ERR_PTR(-EINVAL);
@@ -1478,6 +1481,9 @@ static int __open_session(struct ceph_mds_client *mdsc,
int mstate;
int mds = session->s_mds;
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO)
+ return -EIO;
+
/* wait for mds to go active? */
mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
dout("open_session to mds%d (%s)\n", mds,
@@ -2860,6 +2866,11 @@ static void __do_request(struct ceph_mds_client *mdsc,
return;
}
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) {
+ dout("do_request metadata corrupted\n");
+ err = -EIO;
+ goto finish;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
dout("do_request timed out\n");
@@ -3245,6 +3256,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
u64 tid;
int err, result;
int mds = session->s_mds;
+ bool close_sessions = false;
if (msg->front.iov_len < sizeof(*head)) {
pr_err("mdsc_handle_reply got corrupt (short) reply\n");
@@ -3351,10 +3363,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
realm = NULL;
if (rinfo->snapblob_len) {
down_write(&mdsc->snap_rwsem);
- ceph_update_snap_trace(mdsc, rinfo->snapblob,
+ err = ceph_update_snap_trace(mdsc, rinfo->snapblob,
rinfo->snapblob + rinfo->snapblob_len,
le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP,
&realm);
+ if (err) {
+ up_write(&mdsc->snap_rwsem);
+ close_sessions = true;
+ if (err == -EIO)
+ ceph_msg_dump(msg);
+ goto out_err;
+ }
downgrade_write(&mdsc->snap_rwsem);
} else {
down_read(&mdsc->snap_rwsem);
@@ -3412,6 +3431,10 @@ out_err:
req->r_end_latency, err);
out:
ceph_mdsc_put_request(req);
+
+ /* Defer closing the sessions after s_mutex lock being released */
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
return;
}
@@ -3662,6 +3685,12 @@ static void handle_session(struct ceph_mds_session *session,
break;
case CEPH_SESSION_FLUSHMSG:
+ /* flush cap releases */
+ spin_lock(&session->s_cap_lock);
+ if (session->s_num_cap_releases)
+ ceph_flush_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
+
send_flushmsg_ack(mdsc, session, seq);
break;
@@ -5011,7 +5040,7 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
}
/*
- * called after sb is ro.
+ * called after sb is ro or when metadata corrupted.
*/
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
@@ -5301,7 +5330,8 @@ static void mds_peer_reset(struct ceph_connection *con)
struct ceph_mds_client *mdsc = s->s_mdsc;
pr_warn("mds%d closed our session\n", s->s_mds);
- send_mds_reconnect(mdsc, s);
+ if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
+ send_mds_reconnect(mdsc, s);
}
static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e4151852184e..87007203f130 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
+#include <linux/fs.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/iversion.h>
@@ -766,8 +767,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm;
struct ceph_snap_realm *first_realm = NULL;
struct ceph_snap_realm *realm_to_rebuild = NULL;
+ struct ceph_client *client = mdsc->fsc->client;
int rebuild_snapcs;
int err = -ENOMEM;
+ int ret;
LIST_HEAD(dirty_realms);
lockdep_assert_held_write(&mdsc->snap_rwsem);
@@ -884,6 +887,27 @@ fail:
if (first_realm)
ceph_put_snap_realm(mdsc, first_realm);
pr_err("%s error %d\n", __func__, err);
+
+ /*
+ * When receiving a corrupted snap trace we don't know what
+ * exactly has happened in MDS side. And we shouldn't continue
+ * writing to OSD, which may corrupt the snapshot contents.
+ *
+ * Just try to blocklist this kclient and then this kclient
+ * must be remounted to continue after the corrupted metadata
+ * fixed in the MDS side.
+ */
+ WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO);
+ ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr);
+ if (ret)
+ pr_err("%s failed to blocklist %s: %d\n", __func__,
+ ceph_pr_addr(&client->msgr.inst.addr), ret);
+
+ WARN(1, "%s: %s%sdo remount to continue%s",
+ __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr),
+ ret ? "" : " was blocklisted, ",
+ err == -EIO ? " after corrupted snaptrace is fixed" : "");
+
return err;
}
@@ -984,6 +1008,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
__le64 *split_inos = NULL, *split_realms = NULL;
int i;
int locked_rwsem = 0;
+ bool close_sessions = false;
/* decode */
if (msg->front.iov_len < sizeof(*h))
@@ -1092,8 +1117,12 @@ skip_inode:
* update using the provided snap trace. if we are deleting a
* snap, we can avoid queueing cap_snaps.
*/
- ceph_update_snap_trace(mdsc, p, e,
- op == CEPH_SNAP_OP_DESTROY, NULL);
+ if (ceph_update_snap_trace(mdsc, p, e,
+ op == CEPH_SNAP_OP_DESTROY,
+ NULL)) {
+ close_sessions = true;
+ goto bad;
+ }
if (op == CEPH_SNAP_OP_SPLIT)
/* we took a reference when we created the realm, above */
@@ -1112,6 +1141,9 @@ bad:
out:
if (locked_rwsem)
up_write(&mdsc->snap_rwsem);
+
+ if (close_sessions)
+ ceph_mdsc_close_sessions(mdsc);
return;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 0ed3be75bb9a..07c6906cda70 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -100,6 +100,17 @@ struct ceph_mount_options {
char *mon_addr;
};
+/* mount state */
+enum {
+ CEPH_MOUNT_MOUNTING,
+ CEPH_MOUNT_MOUNTED,
+ CEPH_MOUNT_UNMOUNTING,
+ CEPH_MOUNT_UNMOUNTED,
+ CEPH_MOUNT_SHUTDOWN,
+ CEPH_MOUNT_RECOVER,
+ CEPH_MOUNT_FENCE_IO,
+};
+
#define CEPH_ASYNC_CREATE_CONFLICT_BITS 8
struct ceph_fs_client {
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 5db73c0f792a..cbc18b4a9cb2 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -278,6 +278,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp)
* ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) +
* unicode length of a netbios domain name
*/
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.len = size + 2 * dlen;
ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
if (!ses->auth_key.response) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d371259d6808..b2a04b4e89a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2606,11 +2606,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
INIT_LIST_HEAD(&tcon->pending_opens);
tcon->status = TID_GOOD;
- /* schedule query interfaces poll */
INIT_DELAYED_WORK(&tcon->query_interfaces,
smb2_query_server_interfaces);
- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
- (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ if (ses->server->dialect >= SMB30_PROT_ID &&
+ (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ /* schedule query interfaces poll */
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ }
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcon->tcon_list, &ses->tcon_list);
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 43ad1176dcb9..ac86bd0ebd63 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -269,7 +269,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
list_for_each_entry(t, &ce->tlist, list) {
seq_printf(m, " %s%s\n",
t->name,
- ce->tgthint == t ? " (target hint)" : "");
+ READ_ONCE(ce->tgthint) == t ? " (target hint)" : "");
}
}
}
@@ -321,7 +321,7 @@ static inline void dump_tgts(const struct cache_entry *ce)
cifs_dbg(FYI, "target list:\n");
list_for_each_entry(t, &ce->tlist, list) {
cifs_dbg(FYI, " %s%s\n", t->name,
- ce->tgthint == t ? " (target hint)" : "");
+ READ_ONCE(ce->tgthint) == t ? " (target hint)" : "");
}
}
@@ -427,7 +427,7 @@ static int cache_entry_hash(const void *data, int size, unsigned int *hash)
/* Return target hint of a DFS cache entry */
static inline char *get_tgt_name(const struct cache_entry *ce)
{
- struct cache_dfs_tgt *t = ce->tgthint;
+ struct cache_dfs_tgt *t = READ_ONCE(ce->tgthint);
return t ? t->name : ERR_PTR(-ENOENT);
}
@@ -470,6 +470,7 @@ static struct cache_dfs_tgt *alloc_target(const char *name, int path_consumed)
static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
struct cache_entry *ce, const char *tgthint)
{
+ struct cache_dfs_tgt *target;
int i;
ce->ttl = max_t(int, refs[0].ttl, CACHE_MIN_TTL);
@@ -496,8 +497,9 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
ce->numtgts++;
}
- ce->tgthint = list_first_entry_or_null(&ce->tlist,
- struct cache_dfs_tgt, list);
+ target = list_first_entry_or_null(&ce->tlist, struct cache_dfs_tgt,
+ list);
+ WRITE_ONCE(ce->tgthint, target);
return 0;
}
@@ -558,7 +560,8 @@ static void remove_oldest_entry_locked(void)
}
/* Add a new DFS cache entry */
-static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
+static struct cache_entry *add_cache_entry_locked(struct dfs_info3_param *refs,
+ int numrefs)
{
int rc;
struct cache_entry *ce;
@@ -573,11 +576,11 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash);
if (rc)
- return rc;
+ return ERR_PTR(rc);
ce = alloc_cache_entry(refs, numrefs);
if (IS_ERR(ce))
- return PTR_ERR(ce);
+ return ce;
spin_lock(&cache_ttl_lock);
if (!cache_ttl) {
@@ -594,7 +597,7 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
atomic_inc(&cache_count);
- return 0;
+ return ce;
}
/* Check if two DFS paths are equal. @s1 and @s2 are expected to be in @cache_cp's charset */
@@ -641,7 +644,9 @@ static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int h
*
* Use whole path components in the match. Must be called with htable_rw_lock held.
*
+ * Return cached entry if successful.
* Return ERR_PTR(-ENOENT) if the entry is not found.
+ * Return error ptr otherwise.
*/
static struct cache_entry *lookup_cache_entry(const char *path)
{
@@ -711,14 +716,15 @@ void dfs_cache_destroy(void)
static int update_cache_entry_locked(struct cache_entry *ce, const struct dfs_info3_param *refs,
int numrefs)
{
+ struct cache_dfs_tgt *target;
+ char *th = NULL;
int rc;
- char *s, *th = NULL;
WARN_ON(!rwsem_is_locked(&htable_rw_lock));
- if (ce->tgthint) {
- s = ce->tgthint->name;
- th = kstrdup(s, GFP_ATOMIC);
+ target = READ_ONCE(ce->tgthint);
+ if (target) {
+ th = kstrdup(target->name, GFP_ATOMIC);
if (!th)
return -ENOMEM;
}
@@ -767,51 +773,75 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const
*
* For interlinks, cifs_mount() and expand_dfs_referral() are supposed to
* handle them properly.
+ *
+ * On success, return entry with acquired lock for reading, otherwise error ptr.
*/
-static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path)
+static struct cache_entry *cache_refresh_path(const unsigned int xid,
+ struct cifs_ses *ses,
+ const char *path,
+ bool force_refresh)
{
- int rc;
- struct cache_entry *ce;
struct dfs_info3_param *refs = NULL;
+ struct cache_entry *ce;
int numrefs = 0;
- bool newent = false;
+ int rc;
cifs_dbg(FYI, "%s: search path: %s\n", __func__, path);
- down_write(&htable_rw_lock);
+ down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
if (!IS_ERR(ce)) {
- if (!cache_entry_expired(ce)) {
- dump_ce(ce);
- up_write(&htable_rw_lock);
- return 0;
- }
- } else {
- newent = true;
+ if (!force_refresh && !cache_entry_expired(ce))
+ return ce;
+ } else if (PTR_ERR(ce) != -ENOENT) {
+ up_read(&htable_rw_lock);
+ return ce;
}
/*
- * Either the entry was not found, or it is expired.
+ * Unlock shared access as we don't want to hold any locks while getting
+ * a new referral. The @ses used for performing the I/O could be
+ * reconnecting and it acquires @htable_rw_lock to look up the dfs cache
+ * in order to failover -- if necessary.
+ */
+ up_read(&htable_rw_lock);
+
+ /*
+ * Either the entry was not found, or it is expired, or it is a forced
+ * refresh.
* Request a new DFS referral in order to create or update a cache entry.
*/
rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
- if (rc)
- goto out_unlock;
+ if (rc) {
+ ce = ERR_PTR(rc);
+ goto out;
+ }
dump_refs(refs, numrefs);
- if (!newent) {
- rc = update_cache_entry_locked(ce, refs, numrefs);
- goto out_unlock;
+ down_write(&htable_rw_lock);
+ /* Re-check as another task might have it added or refreshed already */
+ ce = lookup_cache_entry(path);
+ if (!IS_ERR(ce)) {
+ if (force_refresh || cache_entry_expired(ce)) {
+ rc = update_cache_entry_locked(ce, refs, numrefs);
+ if (rc)
+ ce = ERR_PTR(rc);
+ }
+ } else if (PTR_ERR(ce) == -ENOENT) {
+ ce = add_cache_entry_locked(refs, numrefs);
}
- rc = add_cache_entry_locked(refs, numrefs);
+ if (IS_ERR(ce)) {
+ up_write(&htable_rw_lock);
+ goto out;
+ }
-out_unlock:
- up_write(&htable_rw_lock);
+ downgrade_write(&htable_rw_lock);
+out:
free_dfs_info_array(refs, numrefs);
- return rc;
+ return ce;
}
/*
@@ -878,7 +908,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl)
}
it->it_path_consumed = t->path_consumed;
- if (ce->tgthint == t)
+ if (READ_ONCE(ce->tgthint) == t)
list_add(&it->it_list, head);
else
list_add_tail(&it->it_list, head);
@@ -931,15 +961,8 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl
if (IS_ERR(npath))
return PTR_ERR(npath);
- rc = cache_refresh_path(xid, ses, npath);
- if (rc)
- goto out_free_path;
-
- down_read(&htable_rw_lock);
-
- ce = lookup_cache_entry(npath);
+ ce = cache_refresh_path(xid, ses, npath, false);
if (IS_ERR(ce)) {
- up_read(&htable_rw_lock);
rc = PTR_ERR(ce);
goto out_free_path;
}
@@ -1003,72 +1026,6 @@ out_unlock:
}
/**
- * dfs_cache_update_tgthint - update target hint of a DFS cache entry
- *
- * If it doesn't find the cache entry, then it will get a DFS referral for @path
- * and create a new entry.
- *
- * In case the cache entry exists but expired, it will get a DFS referral
- * for @path and then update the respective cache entry.
- *
- * @xid: syscall id
- * @ses: smb session
- * @cp: codepage
- * @remap: type of character remapping for paths
- * @path: path to lookup in DFS referral cache
- * @it: DFS target iterator
- *
- * Return zero if the target hint was updated successfully, otherwise non-zero.
- */
-int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *cp, int remap, const char *path,
- const struct dfs_cache_tgt_iterator *it)
-{
- int rc;
- const char *npath;
- struct cache_entry *ce;
- struct cache_dfs_tgt *t;
-
- npath = dfs_cache_canonical_path(path, cp, remap);
- if (IS_ERR(npath))
- return PTR_ERR(npath);
-
- cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath);
-
- rc = cache_refresh_path(xid, ses, npath);
- if (rc)
- goto out_free_path;
-
- down_write(&htable_rw_lock);
-
- ce = lookup_cache_entry(npath);
- if (IS_ERR(ce)) {
- rc = PTR_ERR(ce);
- goto out_unlock;
- }
-
- t = ce->tgthint;
-
- if (likely(!strcasecmp(it->it_name, t->name)))
- goto out_unlock;
-
- list_for_each_entry(t, &ce->tlist, list) {
- if (!strcasecmp(t->name, it->it_name)) {
- ce->tgthint = t;
- cifs_dbg(FYI, "%s: new target hint: %s\n", __func__,
- it->it_name);
- break;
- }
- }
-
-out_unlock:
- up_write(&htable_rw_lock);
-out_free_path:
- kfree(npath);
- return rc;
-}
-
-/**
* dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry
* without sending any requests to the currently connected server.
*
@@ -1092,21 +1049,20 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt
cifs_dbg(FYI, "%s: path: %s\n", __func__, path);
- if (!down_write_trylock(&htable_rw_lock))
- return;
+ down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
if (IS_ERR(ce))
goto out_unlock;
- t = ce->tgthint;
+ t = READ_ONCE(ce->tgthint);
if (unlikely(!strcasecmp(it->it_name, t->name)))
goto out_unlock;
list_for_each_entry(t, &ce->tlist, list) {
if (!strcasecmp(t->name, it->it_name)) {
- ce->tgthint = t;
+ WRITE_ONCE(ce->tgthint, t);
cifs_dbg(FYI, "%s: new target hint: %s\n", __func__,
it->it_name);
break;
@@ -1114,7 +1070,7 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt
}
out_unlock:
- up_write(&htable_rw_lock);
+ up_read(&htable_rw_lock);
}
/**
@@ -1299,7 +1255,6 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c
* Resolve share's hostname and check if server address matches. Otherwise just ignore it
* as we could not have upcall to resolve hostname or failed to convert ip address.
*/
- match = true;
extract_unc_hostname(s1, &host, &hostlen);
scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host);
@@ -1321,35 +1276,37 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c
* Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new
* target shares in @refs.
*/
-static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl,
- const struct dfs_info3_param *refs, int numrefs)
+static void mark_for_reconnect_if_needed(struct TCP_Server_Info *server,
+ struct dfs_cache_tgt_list *old_tl,
+ struct dfs_cache_tgt_list *new_tl)
{
- struct dfs_cache_tgt_iterator *it;
- int i;
-
- for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) {
- for (i = 0; i < numrefs; i++) {
- if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it),
- refs[i].node_name))
+ struct dfs_cache_tgt_iterator *oit, *nit;
+
+ for (oit = dfs_cache_get_tgt_iterator(old_tl); oit;
+ oit = dfs_cache_get_next_tgt(old_tl, oit)) {
+ for (nit = dfs_cache_get_tgt_iterator(new_tl); nit;
+ nit = dfs_cache_get_next_tgt(new_tl, nit)) {
+ if (target_share_equal(server,
+ dfs_cache_get_tgt_name(oit),
+ dfs_cache_get_tgt_name(nit)))
return;
}
}
cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
- cifs_signal_cifsd_for_reconnect(tcon->ses->server, true);
+ cifs_signal_cifsd_for_reconnect(server, true);
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_refresh)
{
- struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+ struct dfs_cache_tgt_list old_tl = DFS_CACHE_TGT_LIST_INIT(old_tl);
+ struct dfs_cache_tgt_list new_tl = DFS_CACHE_TGT_LIST_INIT(new_tl);
struct cifs_ses *ses = CIFS_DFS_ROOT_SES(tcon->ses);
struct cifs_tcon *ipc = ses->tcon_ipc;
- struct dfs_info3_param *refs = NULL;
bool needs_refresh = false;
struct cache_entry *ce;
unsigned int xid;
- int numrefs = 0;
int rc = 0;
xid = get_xid();
@@ -1358,9 +1315,8 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r
ce = lookup_cache_entry(path);
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
if (!IS_ERR(ce)) {
- rc = get_targets(ce, &tl);
- if (rc)
- cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc);
+ rc = get_targets(ce, &old_tl);
+ cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc);
}
up_read(&htable_rw_lock);
@@ -1377,26 +1333,18 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r
}
spin_unlock(&ipc->tc_lock);
- rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
- if (!rc) {
- /* Create or update a cache entry with the new referral */
- dump_refs(refs, numrefs);
-
- down_write(&htable_rw_lock);
- ce = lookup_cache_entry(path);
- if (IS_ERR(ce))
- add_cache_entry_locked(refs, numrefs);
- else if (force_refresh || cache_entry_expired(ce))
- update_cache_entry_locked(ce, refs, numrefs);
- up_write(&htable_rw_lock);
-
- mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs);
+ ce = cache_refresh_path(xid, ses, path, true);
+ if (!IS_ERR(ce)) {
+ rc = get_targets(ce, &new_tl);
+ up_read(&htable_rw_lock);
+ cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc);
+ mark_for_reconnect_if_needed(tcon->ses->server, &old_tl, &new_tl);
}
out:
free_xid(xid);
- dfs_cache_free_tgts(&tl);
- free_dfs_info_array(refs, numrefs);
+ dfs_cache_free_tgts(&old_tl);
+ dfs_cache_free_tgts(&new_tl);
return rc;
}
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index f7cff0be9327..be3b5a44cf82 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -35,9 +35,6 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl
struct dfs_cache_tgt_list *tgt_list);
int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
struct dfs_cache_tgt_list *tgt_list);
-int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *cp, int remap, const char *path,
- const struct dfs_cache_tgt_iterator *it);
void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it);
int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it,
struct dfs_info3_param *ref);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1d9cc59d8259..2870e3b6ffe8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3890,7 +3890,7 @@ uncached_fill_pages(struct TCP_Server_Info *server,
rdata->got_bytes += result;
}
- return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+ return result != -ECONNABORTED && rdata->got_bytes > 0 ?
rdata->got_bytes : result;
}
@@ -4666,7 +4666,7 @@ readpages_fill_pages(struct TCP_Server_Info *server,
rdata->got_bytes += result;
}
- return rdata->got_bytes > 0 && result != -ECONNABORTED ?
+ return result != -ECONNABORTED && rdata->got_bytes > 0 ?
rdata->got_bytes : result;
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index bd374feeccaa..a5a097a69983 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.disposition = FILE_CREATE;
oparms.fid = &fid;
oparms.reconnect = false;
+ oparms.mode = 0644;
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
NULL, NULL);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 0b842a07e157..c47b254f0d1e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -815,6 +815,7 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
return -EINVAL;
}
if (tilen) {
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen,
GFP_KERNEL);
if (!ses->auth_key.response) {
@@ -1428,6 +1429,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
goto out_put_spnego_key;
}
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
GFP_KERNEL);
if (!ses->auth_key.response) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 50480751e521..4cb364454e13 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls,
cifs_remap(cifs_sb));
- if (!rc)
- move_cifs_info_to_smb2(&data->fi, &fi);
*adjustTZ = true;
}
- if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) {
+ if (!rc) {
int tmprc;
int oplock = 0;
struct cifs_fid fid;
struct cifs_open_parms oparms;
+ move_cifs_info_to_smb2(&data->fi, &fi);
+
+ if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE))
+ return 0;
+
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -716,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path,
static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
void *buf)
{
- FILE_ALL_INFO *fi = buf;
+ struct cifs_open_info_data *data = buf;
+ FILE_ALL_INFO fi = {};
+ int rc;
if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS))
- return SMBLegacyOpen(xid, oparms->tcon, oparms->path,
- oparms->disposition,
- oparms->desired_access,
- oparms->create_options,
- &oparms->fid->netfid, oplock, fi,
- oparms->cifs_sb->local_nls,
- cifs_remap(oparms->cifs_sb));
- return CIFS_open(xid, oparms, oplock, fi);
+ rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path,
+ oparms->disposition,
+ oparms->desired_access,
+ oparms->create_options,
+ &oparms->fid->netfid, oplock, &fi,
+ oparms->cifs_sb->local_nls,
+ cifs_remap(oparms->cifs_sb));
+ else
+ rc = CIFS_open(xid, oparms, oplock, &fi);
+
+ if (!rc && data)
+ move_cifs_info_to_smb2(&data->fi, &fi);
+
+ return rc;
}
static void
@@ -1050,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct inode *newinode = NULL;
int rc = -EPERM;
- FILE_ALL_INFO *buf = NULL;
+ struct cifs_open_info_data buf = {};
struct cifs_io_parms io_parms;
__u32 oplock = 0;
struct cifs_fid fid;
@@ -1082,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode,
cifs_sb->local_nls,
cifs_remap(cifs_sb));
if (rc)
- goto out;
+ return rc;
rc = cifs_get_inode_info_unix(&newinode, full_path,
inode->i_sb, xid);
if (rc == 0)
d_instantiate(dentry, newinode);
- goto out;
+ return rc;
}
/*
@@ -1097,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode,
* support block and char device (no socket & fifo)
*/
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
- goto out;
+ return rc;
if (!S_ISCHR(mode) && !S_ISBLK(mode))
- goto out;
+ return rc;
cifs_dbg(FYI, "sfu compat create special file\n");
- buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
- if (buf == NULL) {
- rc = -ENOMEM;
- goto out;
- }
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
@@ -1124,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode,
oplock = REQ_OPLOCK;
else
oplock = 0;
- rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf);
+ rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
if (rc)
- goto out;
+ return rc;
/*
* BB Do not bother to decode buf since no local inode yet to put
* timestamps in, but we can reuse it safely.
*/
- pdev = (struct win_dev *)buf;
+ pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
io_parms.offset = 0;
io_parms.length = sizeof(struct win_dev);
- iov[1].iov_base = buf;
+ iov[1].iov_base = &buf.fi;
iov[1].iov_len = sizeof(struct win_dev);
if (S_ISCHR(mode)) {
memcpy(pdev->type, "IntxCHR", 8);
@@ -1157,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode,
d_drop(dentry);
/* FIXME: add code here to set EAs */
-out:
- kfree(buf);
+
+ cifs_free_open_info(&buf);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2c484d47c592..2c9ffa921e6f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1453,6 +1453,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
/* keep session key if binding */
if (!is_binding) {
+ kfree_sensitive(ses->auth_key.response);
ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
GFP_KERNEL);
if (!ses->auth_key.response) {
@@ -1482,8 +1483,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
out_put_spnego_key:
key_invalidate(spnego_key);
key_put(spnego_key);
- if (rc)
+ if (rc) {
kfree_sensitive(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ ses->auth_key.len = 0;
+ }
out:
sess_data->result = rc;
sess_data->func = NULL;
@@ -4159,12 +4163,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
(struct smb2_hdr *)rdata->iov[0].iov_base;
struct cifs_credits credits = { .value = 0, .instance = 0 };
struct smb_rqst rqst = { .rq_iov = &rdata->iov[1],
- .rq_nvec = 1,
- .rq_pages = rdata->pages,
- .rq_offset = rdata->page_offset,
- .rq_npages = rdata->nr_pages,
- .rq_pagesz = rdata->pagesz,
- .rq_tailsz = rdata->tailsz };
+ .rq_nvec = 1, };
+
+ if (rdata->got_bytes) {
+ rqst.rq_pages = rdata->pages;
+ rqst.rq_offset = rdata->page_offset;
+ rqst.rq_npages = rdata->nr_pages;
+ rqst.rq_pagesz = rdata->pagesz;
+ rqst.rq_tailsz = rdata->tailsz;
+ }
WARN_ONCE(rdata->server != mid->server,
"rdata server %p != mid server %p",
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 90789aaa6567..8c816b25ce7c 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1405,6 +1405,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
destroy_workqueue(info->workqueue);
log_rdma_event(INFO, "rdma session destroyed\n");
kfree(info);
+ server->smbd_conn = NULL;
}
/*
diff --git a/fs/coredump.c b/fs/coredump.c
index de78bde2991b..a25ecec9ca7c 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -838,6 +838,30 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
}
}
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+{
+ if (cprm->to_skip) {
+ if (!__dump_skip(cprm, cprm->to_skip))
+ return 0;
+ cprm->to_skip = 0;
+ }
+ return __dump_emit(cprm, addr, nr);
+}
+EXPORT_SYMBOL(dump_emit);
+
+void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
+{
+ cprm->to_skip = pos - cprm->pos;
+}
+EXPORT_SYMBOL(dump_skip_to);
+
+void dump_skip(struct coredump_params *cprm, size_t nr)
+{
+ cprm->to_skip += nr;
+}
+EXPORT_SYMBOL(dump_skip);
+
+#ifdef CONFIG_ELF_CORE
static int dump_emit_page(struct coredump_params *cprm, struct page *page)
{
struct bio_vec bvec = {
@@ -871,30 +895,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 1;
}
-int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
-{
- if (cprm->to_skip) {
- if (!__dump_skip(cprm, cprm->to_skip))
- return 0;
- cprm->to_skip = 0;
- }
- return __dump_emit(cprm, addr, nr);
-}
-EXPORT_SYMBOL(dump_emit);
-
-void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
-{
- cprm->to_skip = pos - cprm->pos;
-}
-EXPORT_SYMBOL(dump_skip_to);
-
-void dump_skip(struct coredump_params *cprm, size_t nr)
-{
- cprm->to_skip += nr;
-}
-EXPORT_SYMBOL(dump_skip);
-
-#ifdef CONFIG_ELF_CORE
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len)
{
diff --git a/fs/dax.c b/fs/dax.c
index c48a3a93ab29..3e457a16c7d1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1271,8 +1271,9 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
if (ret < 0)
goto out_unlock;
- ret = copy_mc_to_kernel(daddr, saddr, length);
- if (ret)
+ if (copy_mc_to_kernel(daddr, saddr, length) == 0)
+ ret = length;
+ else
ret = -EIO;
out_unlock:
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 481788c24a68..626a615dafc2 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -577,26 +577,25 @@ static int erofs_fc_parse_param(struct fs_context *fc,
}
++ctx->devs->extra_devices;
break;
- case Opt_fsid:
#ifdef CONFIG_EROFS_FS_ONDEMAND
+ case Opt_fsid:
kfree(ctx->fsid);
ctx->fsid = kstrdup(param->string, GFP_KERNEL);
if (!ctx->fsid)
return -ENOMEM;
-#else
- errorfc(fc, "fsid option not supported");
-#endif
break;
case Opt_domain_id:
-#ifdef CONFIG_EROFS_FS_ONDEMAND
kfree(ctx->domain_id);
ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
if (!ctx->domain_id)
return -ENOMEM;
+ break;
#else
- errorfc(fc, "domain_id option not supported");
-#endif
+ case Opt_fsid:
+ case Opt_domain_id:
+ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
break;
+#endif
default:
return -ENOPARAM;
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index ccf7c55d477f..5200bb86e264 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1032,12 +1032,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
if (!be->decompressed_pages)
be->decompressed_pages =
- kvcalloc(be->nr_pages, sizeof(struct page *),
- GFP_KERNEL | __GFP_NOFAIL);
+ kcalloc(be->nr_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
if (!be->compressed_pages)
be->compressed_pages =
- kvcalloc(pclusterpages, sizeof(struct page *),
- GFP_KERNEL | __GFP_NOFAIL);
+ kcalloc(pclusterpages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
z_erofs_parse_out_bvecs(be);
err2 = z_erofs_parse_in_bvecs(be, &overlapped);
@@ -1085,7 +1085,7 @@ out:
}
if (be->compressed_pages < be->onstack_pages ||
be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
- kvfree(be->compressed_pages);
+ kfree(be->compressed_pages);
z_erofs_fill_other_copies(be, err);
for (i = 0; i < be->nr_pages; ++i) {
@@ -1104,7 +1104,7 @@ out:
}
if (be->decompressed_pages != be->onstack_pages)
- kvfree(be->decompressed_pages);
+ kfree(be->decompressed_pages);
pcl->length = 0;
pcl->partial = true;
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 0150570c33aa..98fb90b9af71 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -793,12 +793,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
/*
- * No strict rule how to describe extents for post EOF, yet
- * we need do like below. Otherwise, iomap itself will get
+ * No strict rule on how to describe extents for post EOF, yet
+ * we need to do like below. Otherwise, iomap itself will get
* into an endless loop on post EOF.
+ *
+ * Calculate the effective offset by subtracting extent start
+ * (map.m_la) from the requested offset, and add it to length.
+ * (NB: offset >= map.m_la always)
*/
if (iomap->offset >= inode->i_size)
- iomap->length = length + map.m_la - offset;
+ iomap->length = length + offset - map.m_la;
}
iomap->flags = 0;
return 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7decaaf27e82..a2f04a3808db 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -81,6 +81,8 @@ ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
struct mb_cache_entry **);
static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
size_t value_count);
+static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value,
+ size_t value_count);
static void ext4_xattr_rehash(struct ext4_xattr_header *);
static const struct xattr_handler * const ext4_xattr_handler_map[] = {
@@ -470,8 +472,22 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
tmp_data = cpu_to_le32(hash);
e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len,
&tmp_data, 1);
+ /* All good? */
+ if (e_hash == entry->e_hash)
+ return 0;
+
+ /*
+ * Not good. Maybe the entry hash was calculated
+ * using the buggy signed char version?
+ */
+ e_hash = ext4_xattr_hash_entry_signed(entry->e_name, entry->e_name_len,
+ &tmp_data, 1);
+ /* Still no match - bad */
if (e_hash != entry->e_hash)
return -EFSCORRUPTED;
+
+ /* Let people know about old hash */
+ pr_warn_once("ext4: filesystem with signed xattr name hash");
}
return 0;
}
@@ -3081,7 +3097,29 @@ static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
while (name_len--) {
hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
- *name++;
+ (unsigned char)*name++;
+ }
+ while (value_count--) {
+ hash = (hash << VALUE_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+ le32_to_cpu(*value++);
+ }
+ return cpu_to_le32(hash);
+}
+
+/*
+ * ext4_xattr_hash_entry_signed()
+ *
+ * Compute the hash of an extended attribute incorrectly.
+ */
+static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, size_t value_count)
+{
+ __u32 hash = 0;
+
+ while (name_len--) {
+ hash = (hash << NAME_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+ (signed char)*name++;
}
while (value_count--) {
hash = (hash << VALUE_HASH_SHIFT) ^
diff --git a/fs/freevxfs/Kconfig b/fs/freevxfs/Kconfig
index c05c71d57291..0e2fc08f7de4 100644
--- a/fs/freevxfs/Kconfig
+++ b/fs/freevxfs/Kconfig
@@ -8,7 +8,7 @@ config VXFS_FS
of SCO UnixWare (and possibly others) and optionally available
for Sunsoft Solaris, HP-UX and many other operating systems. However
these particular OS implementations of vxfs may differ in on-disk
- data endianess and/or superblock offset. The vxfs module has been
+ data endianness and/or superblock offset. The vxfs module has been
tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.)
Currently only readonly access is supported and VxFX versions
2, 3 and 4. Tests were performed with HP-UX VxFS version 3.
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index ab8ceddf9efa..cdf991bdd9de 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -141,13 +141,14 @@ static bool fscache_is_acquire_pending(struct fscache_volume *volume)
static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
- wait_var_event_timeout(&candidate->flags,
- !fscache_is_acquire_pending(candidate), 20 * HZ);
+ wait_on_bit_timeout(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
+ TASK_UNINTERRUPTIBLE, 20 * HZ);
if (fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
- wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
+ wait_on_bit(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
+ TASK_UNINTERRUPTIBLE);
}
}
@@ -279,8 +280,7 @@ static void fscache_create_volume_work(struct work_struct *work)
fscache_end_cache_access(volume->cache,
fscache_access_acquire_volume_end);
- clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
- wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
+ clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
fscache_put_volume(volume, fscache_volume_put_create_work);
}
@@ -347,8 +347,8 @@ static void fscache_wake_pending_volume(struct fscache_volume *volume,
hlist_bl_for_each_entry(cursor, p, h, hash_link) {
if (fscache_volume_same(cursor, volume)) {
fscache_see_volume(cursor, fscache_volume_see_hash_wake);
- clear_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &cursor->flags);
- wake_up_bit(&cursor->flags, FSCACHE_VOLUME_ACQUIRE_PENDING);
+ clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING,
+ &cursor->flags);
return;
}
}
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index a4850aee2639..ad670369955f 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -11,9 +11,10 @@
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
-struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
+static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc,
+ struct user_namespace *mnt_userns,
+ struct inode *inode, int type, bool rcu)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
int size;
const char *name;
void *value = NULL;
@@ -25,7 +26,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
if (fuse_is_bad(inode))
return ERR_PTR(-EIO);
- if (!fc->posix_acl || fc->no_getxattr)
+ if (fc->no_getxattr)
return NULL;
if (type == ACL_TYPE_ACCESS)
@@ -53,6 +54,46 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu)
return acl;
}
+static inline bool fuse_no_acl(const struct fuse_conn *fc,
+ const struct inode *inode)
+{
+ /*
+ * Refuse interacting with POSIX ACLs for daemons that
+ * don't support FUSE_POSIX_ACL and are not mounted on
+ * the host to retain backwards compatibility.
+ */
+ return !fc->posix_acl && (i_user_ns(inode) != &init_user_ns);
+}
+
+struct posix_acl *fuse_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, int type)
+{
+ struct inode *inode = d_inode(dentry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (fuse_no_acl(fc, inode))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return __fuse_get_acl(fc, mnt_userns, inode, type, false);
+}
+
+struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ /*
+ * FUSE daemons before FUSE_POSIX_ACL was introduced could get and set
+ * POSIX ACLs without them being used for permission checking by the
+ * vfs. Retain that behavior for backwards compatibility as there are
+ * filesystems that do all permission checking for acls in the daemon
+ * and not in the kernel.
+ */
+ if (!fc->posix_acl)
+ return NULL;
+
+ return __fuse_get_acl(fc, &init_user_ns, inode, type, rcu);
+}
+
int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
struct posix_acl *acl, int type)
{
@@ -64,7 +105,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
if (fuse_is_bad(inode))
return -EIO;
- if (!fc->posix_acl || fc->no_setxattr)
+ if (fc->no_setxattr || fuse_no_acl(fc, inode))
return -EOPNOTSUPP;
if (type == ACL_TYPE_ACCESS)
@@ -99,7 +140,13 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
return ret;
}
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(&init_user_ns, inode)) &&
+ /*
+ * Fuse daemons without FUSE_POSIX_ACL never changed the passed
+ * through POSIX ACLs. Such daemons don't expect setgid bits to
+ * be stripped.
+ */
+ if (fc->posix_acl &&
+ !vfsgid_in_group_p(i_gid_into_vfsgid(&init_user_ns, inode)) &&
!capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
@@ -108,8 +155,15 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
} else {
ret = fuse_removexattr(inode, name);
}
- forget_all_cached_acls(inode);
- fuse_invalidate_attr(inode);
+
+ if (fc->posix_acl) {
+ /*
+ * Fuse daemons without FUSE_POSIX_ACL never cached POSIX ACLs
+ * and didn't invalidate attributes. Retain that behavior.
+ */
+ forget_all_cached_acls(inode);
+ fuse_invalidate_attr(inode);
+ }
return ret;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index cd1a071b625a..2725fb54328e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1942,7 +1942,8 @@ static const struct inode_operations fuse_dir_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
- .get_inode_acl = fuse_get_acl,
+ .get_inode_acl = fuse_get_inode_acl,
+ .get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
.fileattr_get = fuse_fileattr_get,
.fileattr_set = fuse_fileattr_set,
@@ -1964,7 +1965,8 @@ static const struct inode_operations fuse_common_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
- .get_inode_acl = fuse_get_acl,
+ .get_inode_acl = fuse_get_inode_acl,
+ .get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
.fileattr_get = fuse_fileattr_get,
.fileattr_set = fuse_fileattr_set,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c673faefdcb9..46797a171a84 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1264,11 +1264,11 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
int fuse_removexattr(struct inode *inode, const char *name);
extern const struct xattr_handler *fuse_xattr_handlers[];
-extern const struct xattr_handler *fuse_acl_xattr_handlers[];
-extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
struct posix_acl;
-struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu);
+struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu);
+struct posix_acl *fuse_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, int type);
int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
struct posix_acl *acl, int type);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6b3beda16c1b..de9b9ec5ce81 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -311,7 +311,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
-static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_conn *fc)
{
inode->i_mode = attr->mode & S_IFMT;
inode->i_size = attr->size;
@@ -333,6 +334,12 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
new_decode_dev(attr->rdev));
} else
BUG();
+ /*
+ * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL
+ * so they see the exact same behavior as before.
+ */
+ if (!fc->posix_acl)
+ inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
}
static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
@@ -372,7 +379,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
if (!inode)
return NULL;
- fuse_init_inode(inode, attr);
+ fuse_init_inode(inode, attr, fc);
get_fuse_inode(inode)->nodeid = nodeid;
inode->i_flags |= S_AUTOMOUNT;
goto done;
@@ -388,7 +395,7 @@ retry:
if (!fc->writeback_cache || !S_ISREG(attr->mode))
inode->i_flags |= S_NOCMTIME;
inode->i_generation = generation;
- fuse_init_inode(inode, attr);
+ fuse_init_inode(inode, attr, fc);
unlock_new_inode(inode);
} else if (fuse_stale_inode(inode, generation, attr)) {
/* nodeid was reused, any I/O on the old inode should fail */
@@ -1174,7 +1181,6 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
if ((flags & FUSE_POSIX_ACL)) {
fc->default_permissions = 1;
fc->posix_acl = 1;
- fm->sb->s_xattr = fuse_acl_xattr_handlers;
}
if (flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1;
@@ -1420,13 +1426,6 @@ static void fuse_sb_defaults(struct super_block *sb)
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
-
- /*
- * If we are not in the initial user namespace posix
- * acls must be translated.
- */
- if (sb->s_user_ns != &init_user_ns)
- sb->s_xattr = fuse_no_acl_xattr_handlers;
}
static int fuse_fill_super_submount(struct super_block *sb,
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 0d3e7177fce0..9fe571ab569e 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -203,27 +203,6 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
return fuse_setxattr(inode, name, value, size, flags, 0);
}
-static bool no_xattr_list(struct dentry *dentry)
-{
- return false;
-}
-
-static int no_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *value, size_t size)
-{
- return -EOPNOTSUPP;
-}
-
-static int no_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
- struct dentry *dentry, struct inode *nodee,
- const char *name, const void *value,
- size_t size, int flags)
-{
- return -EOPNOTSUPP;
-}
-
static const struct xattr_handler fuse_xattr_handler = {
.prefix = "",
.get = fuse_xattr_get,
@@ -234,33 +213,3 @@ const struct xattr_handler *fuse_xattr_handlers[] = {
&fuse_xattr_handler,
NULL
};
-
-const struct xattr_handler *fuse_acl_xattr_handlers[] = {
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
- &fuse_xattr_handler,
- NULL
-};
-
-static const struct xattr_handler fuse_no_acl_access_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = no_xattr_list,
- .get = no_xattr_get,
- .set = no_xattr_set,
-};
-
-static const struct xattr_handler fuse_no_acl_default_xattr_handler = {
- .name = XATTR_NAME_POSIX_ACL_DEFAULT,
- .flags = ACL_TYPE_ACCESS,
- .list = no_xattr_list,
- .get = no_xattr_get,
- .set = no_xattr_set,
-};
-
-const struct xattr_handler *fuse_no_acl_xattr_handlers[] = {
- &fuse_no_acl_access_xattr_handler,
- &fuse_no_acl_default_xattr_handler,
- &fuse_xattr_handler,
- NULL
-};
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 723639376ae2..61323deb80bc 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -80,6 +80,15 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
brelse(bd->bd_bh);
}
+static int __gfs2_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct address_space *mapping = data;
+ int ret = mapping->a_ops->writepage(page, wbc);
+ mapping_set_error(mapping, ret);
+ return ret;
+}
+
/**
* gfs2_ail1_start_one - Start I/O on a transaction
* @sdp: The superblock
@@ -131,7 +140,7 @@ __acquires(&sdp->sd_ail_lock)
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
- ret = filemap_fdatawrite_wbc(mapping, wbc);
+ ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping);
if (need_resched()) {
blk_finish_plug(plug);
cond_resched();
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 2a39ffb8423b..6e61b5bc7d86 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
dn_off = le32_to_cpu(authblob->DomainName.BufferOffset);
dn_len = le16_to_cpu(authblob->DomainName.Length);
- if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len)
+ if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len ||
+ nt_len < CIFS_ENCPWD_SIZE)
return -EINVAL;
/* TODO : use domain name that imported from configuration file */
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 12be8386446a..56be077e5d8a 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -280,7 +280,7 @@ int ksmbd_conn_handler_loop(void *p)
{
struct ksmbd_conn *conn = (struct ksmbd_conn *)p;
struct ksmbd_transport *t = conn->transport;
- unsigned int pdu_size;
+ unsigned int pdu_size, max_allowed_pdu_size;
char hdr_buf[4] = {0,};
int size;
@@ -305,20 +305,36 @@ int ksmbd_conn_handler_loop(void *p)
pdu_size = get_rfc1002_len(hdr_buf);
ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
+ if (conn->status == KSMBD_SESS_GOOD)
+ max_allowed_pdu_size =
+ SMB3_MAX_MSGSIZE + conn->vals->max_write_size;
+ else
+ max_allowed_pdu_size = SMB3_MAX_MSGSIZE;
+
+ if (pdu_size > max_allowed_pdu_size) {
+ pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n",
+ pdu_size, max_allowed_pdu_size,
+ conn->status);
+ break;
+ }
+
/*
* Check if pdu size is valid (min : smb header size,
* max : 0x00FFFFFF).
*/
if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE ||
pdu_size > MAX_STREAM_PROT_LEN) {
- continue;
+ break;
}
/* 4 for rfc1002 length field */
size = pdu_size + 4;
- conn->request_buf = kvmalloc(size, GFP_KERNEL);
+ conn->request_buf = kvmalloc(size,
+ GFP_KERNEL |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
if (!conn->request_buf)
- continue;
+ break;
memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
if (!ksmbd_smb_request(conn))
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index b6bd8311e6b4..fb8b2d566efb 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -106,7 +106,8 @@ struct ksmbd_startup_request {
__u32 sub_auth[3]; /* Subauth value for Security ID */
__u32 smb2_max_credits; /* MAX credits */
__u32 smbd_max_io_size; /* smbd read write size */
- __u32 reserved[127]; /* Reserved room */
+ __u32 max_connections; /* Number of maximum simultaneous connections */
+ __u32 reserved[126]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
};
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
index 0ae8d08d85a8..4d9e0b54e3db 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/ksmbd/ndr.c
@@ -242,7 +242,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
return ret;
if (da->version != 3 && da->version != 4) {
- pr_err("v%d version is not supported\n", da->version);
+ ksmbd_debug(VFS, "v%d version is not supported\n", da->version);
return -EINVAL;
}
@@ -251,7 +251,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
return ret;
if (da->version != version2) {
- pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n",
da->version, version2);
return -EINVAL;
}
@@ -457,7 +457,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
if (ret)
return ret;
if (acl->version != 4) {
- pr_err("v%d version is not supported\n", acl->version);
+ ksmbd_debug(VFS, "v%d version is not supported\n", acl->version);
return -EINVAL;
}
@@ -465,7 +465,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
if (ret)
return ret;
if (acl->version != version2) {
- pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n",
acl->version, version2);
return -EINVAL;
}
diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h
index ac9d932f8c8a..db7278181760 100644
--- a/fs/ksmbd/server.h
+++ b/fs/ksmbd/server.h
@@ -41,6 +41,7 @@ struct ksmbd_server_config {
unsigned int share_fake_fscaps;
struct smb_sid domain_sid;
unsigned int auth_mechs;
+ unsigned int max_connections;
char *conf[SERVER_CONF_WORK_GROUP + 1];
};
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 000a6648f122..90eb05ca4a3c 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -1929,13 +1929,13 @@ int smb2_tree_connect(struct ksmbd_work *work)
if (conn->posix_ext_supported)
status.tree_conn->posix_extensions = true;
-out_err1:
rsp->StructureSize = cpu_to_le16(16);
+ inc_rfc1001_len(work->response_buf, 16);
+out_err1:
rsp->Capabilities = 0;
rsp->Reserved = 0;
/* default manual caching */
rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
- inc_rfc1001_len(work->response_buf, 16);
if (!IS_ERR(treename))
kfree(treename);
@@ -1968,6 +1968,9 @@ out_err1:
rsp->hdr.Status = STATUS_ACCESS_DENIED;
}
+ if (status.ret != KSMBD_TREE_CONN_STATUS_OK)
+ smb2_set_err_rsp(work);
+
return rc;
}
@@ -8661,6 +8664,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
struct smb2_hdr *rsp = smb2_get_msg(work->response_buf);
if (conn->dialect < SMB30_PROT_ID)
@@ -8670,6 +8674,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
rsp = ksmbd_resp_buf_next(work);
if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE &&
+ sess->user && !user_guest(sess->user) &&
rsp->Status == STATUS_SUCCESS)
return true;
return false;
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index aa5dbe54f5a1..0c8a770fe318 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -24,8 +24,9 @@
#define SMB21_DEFAULT_IOSIZE (1024 * 1024)
#define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024)
-#define SMB3_MIN_IOSIZE (64 * 1024)
-#define SMB3_MAX_IOSIZE (8 * 1024 * 1024)
+#define SMB3_MIN_IOSIZE (64 * 1024)
+#define SMB3_MAX_IOSIZE (8 * 1024 * 1024)
+#define SMB3_MAX_MSGSIZE (4 * 4096)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index c9aca21637d5..40c721f9227e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -308,6 +308,9 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
if (req->smbd_max_io_size)
init_smbd_max_io_size(req->smbd_max_io_size);
+ if (req->max_connections)
+ server_conf.max_connections = req->max_connections;
+
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
ret |= ksmbd_set_work_group(req->work_group);
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index 63d55f543bd2..603893fd87f5 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -15,6 +15,8 @@
#define IFACE_STATE_DOWN BIT(0)
#define IFACE_STATE_CONFIGURED BIT(1)
+static atomic_t active_num_conn;
+
struct interface {
struct task_struct *ksmbd_kthread;
struct socket *ksmbd_socket;
@@ -185,8 +187,10 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk)
struct tcp_transport *t;
t = alloc_transport(client_sk);
- if (!t)
+ if (!t) {
+ sock_release(client_sk);
return -ENOMEM;
+ }
csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
if (kernel_getpeername(client_sk, csin) < 0) {
@@ -239,6 +243,15 @@ static int ksmbd_kthread_fn(void *p)
continue;
}
+ if (server_conf.max_connections &&
+ atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
+ pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
+ atomic_read(&active_num_conn));
+ atomic_dec(&active_num_conn);
+ sock_release(client_sk);
+ continue;
+ }
+
ksmbd_debug(CONN, "connect success: accepted new connection\n");
client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
@@ -295,6 +308,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
struct msghdr ksmbd_msg;
struct kvec *iov;
struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn;
+ int max_retry = 2;
iov = get_conn_iovec(t, nr_segs);
if (!iov)
@@ -321,9 +335,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
} else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
total_read = -EAGAIN;
break;
- } else if (length == -ERESTARTSYS || length == -EAGAIN) {
+ } else if ((length == -ERESTARTSYS || length == -EAGAIN) &&
+ max_retry) {
usleep_range(1000, 2000);
length = 0;
+ max_retry--;
continue;
} else if (length <= 0) {
total_read = -EAGAIN;
@@ -365,6 +381,8 @@ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov,
static void ksmbd_tcp_disconnect(struct ksmbd_transport *t)
{
free_transport(TCP_TRANS(t));
+ if (server_conf.max_connections)
+ atomic_dec(&active_num_conn);
}
static void tcp_destroy_socket(struct socket *ksmbd_socket)
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 1ead5bd740c2..14a72224b657 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT
config NFS_V4_2_READ_PLUS
bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
depends on NFS_V4_2
- default y
+ default n
help
- Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS
- attempts to improve read performance by compressing out sparse holes
- in the file contents.
+ This is intended for developers only. The READ_PLUS operation has
+ been shown to have issues under specific conditions and should not
+ be used in production.
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 45b2c9e3f636..c0950edb26b0 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -662,6 +662,39 @@ static struct shrinker nfsd_file_shrinker = {
};
/**
+ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
+ * @nf: nfsd_file to attempt to queue
+ * @dispose: private list to queue successfully-put objects
+ *
+ * Unhash an nfsd_file, try to get a reference to it, and then put that
+ * reference. If it's the last reference, queue it to the dispose list.
+ */
+static void
+nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ __must_hold(RCU)
+{
+ int decrement = 1;
+
+ /* If we raced with someone else unhashing, ignore it */
+ if (!nfsd_file_unhash(nf))
+ return;
+
+ /* If we can't get a reference, ignore it */
+ if (!nfsd_file_get(nf))
+ return;
+
+ /* Extra decrement if we remove from the LRU */
+ if (nfsd_file_lru_remove(nf))
+ ++decrement;
+
+ /* If refcount goes to 0, then put on the dispose list */
+ if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+ list_add(&nf->nf_lru, dispose);
+ trace_nfsd_file_closing(nf);
+ }
+}
+
+/**
* nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
* @inode: inode on which to close out nfsd_files
* @dispose: list on which to gather nfsd_files to close out
@@ -688,30 +721,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
rcu_read_lock();
do {
- int decrement = 1;
-
nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
nfsd_file_rhash_params);
if (!nf)
break;
-
- /* If we raced with someone else unhashing, ignore it */
- if (!nfsd_file_unhash(nf))
- continue;
-
- /* If we can't get a reference, ignore it */
- if (!nfsd_file_get(nf))
- continue;
-
- /* Extra decrement if we remove from the LRU */
- if (nfsd_file_lru_remove(nf))
- ++decrement;
-
- /* If refcount goes to 0, then put on the dispose list */
- if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
- list_add(&nf->nf_lru, dispose);
- trace_nfsd_file_closing(nf);
- }
+ nfsd_file_cond_queue(nf, dispose);
} while (1);
rcu_read_unlock();
}
@@ -928,11 +942,8 @@ __nfsd_file_cache_purge(struct net *net)
nf = rhashtable_walk_next(&iter);
while (!IS_ERR_OR_NULL(nf)) {
- if (!net || nf->nf_net == net) {
- nfsd_file_unhash(nf);
- nfsd_file_lru_remove(nf);
- list_add(&nf->nf_lru, &dispose);
- }
+ if (!net || nf->nf_net == net)
+ nfsd_file_cond_queue(nf, &dispose);
nf = rhashtable_walk_next(&iter);
}
@@ -1071,8 +1082,8 @@ nfsd_file_is_cached(struct inode *inode)
static __be32
nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf,
- bool open, bool want_gc)
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf, bool want_gc)
{
struct nfsd_file_lookup_key key = {
.type = NFSD_FILE_KEY_FULL,
@@ -1147,8 +1158,7 @@ wait_for_construction:
status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
out:
if (status == nfs_ok) {
- if (open)
- this_cpu_inc(nfsd_file_acquisitions);
+ this_cpu_inc(nfsd_file_acquisitions);
*pnf = nf;
} else {
if (refcount_dec_and_test(&nf->nf_ref))
@@ -1158,20 +1168,23 @@ out:
out_status:
put_cred(key.cred);
- if (open)
- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
return status;
open_file:
trace_nfsd_file_alloc(nf);
nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
if (nf->nf_mark) {
- if (open) {
+ if (file) {
+ get_file(file);
+ nf->nf_file = file;
+ status = nfs_ok;
+ trace_nfsd_file_opened(nf, status);
+ } else {
status = nfsd_open_verified(rqstp, fhp, may_flags,
&nf->nf_file);
trace_nfsd_file_open(nf, status);
- } else
- status = nfs_ok;
+ }
} else
status = nfserr_jukebox;
/*
@@ -1207,7 +1220,7 @@ __be32
nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
}
/**
@@ -1228,28 +1241,30 @@ __be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
}
/**
- * nfsd_file_create - Get a struct nfsd_file, do not open
+ * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file just created
* @may_flags: NFSD_MAY_ settings for the file
+ * @file: cached, already-open file (may be NULL)
* @pnf: OUT: new or found "struct nfsd_file" object
*
- * The nfsd_file_object returned by this API is reference-counted
- * but not garbage-collected. The object is released immediately
- * one RCU grace period after the final nfsd_file_put().
+ * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
-nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf)
+nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
}
/*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index b7efb2c3ddb1..41516a4263ea 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
-__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **nfp);
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index bc139401927d..ec49b200b797 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,7 +196,7 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bd880d55f565..f189ba7995f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
@@ -1318,6 +1318,7 @@ try_again:
/* allow 20secs for mount/unmount for now - revisit */
if (signal_pending(current) ||
(schedule_timeout(20*HZ) == 0)) {
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
return nfserr_eagain;
}
@@ -2607,12 +2608,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
cstate->minorversion = args->minorversion;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
-
/*
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
- __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -2734,7 +2734,7 @@ encode_op:
out:
cstate->status = status;
/* Reset deferral mechanism for RPC deferrals */
- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
return rpc_success;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b989c72e54e4..c1684da6c01f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4411,7 +4411,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
if (!count)
count = atomic_long_read(&num_delegations);
if (count)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
return (unsigned long)count;
}
@@ -4421,7 +4421,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}
-int
+void
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
@@ -4443,16 +4443,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
atomic_set(&nn->nfsd_courtesy_clients, 0);
- nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
-}
-
-void
-nfsd4_leases_net_shutdown(struct nfsd_net *nn)
-{
- unregister_shrinker(&nn->nfsd_client_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -5262,18 +5252,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- if (!open->op_filp) {
- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- } else {
- status = nfsd_file_create(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- nf->nf_file = open->op_filp;
- open->op_filp = NULL;
- trace_nfsd_file_create(rqstp, access, nf);
- }
+ status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
+ open->op_filp, &nf);
+ if (status != nfs_ok)
+ goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
@@ -6243,8 +6225,7 @@ deleg_reaper(struct nfsd_net *nn)
static void
nfsd4_state_shrinker_worker(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
nfsd_shrinker_work);
courtesy_client_reaper(nn);
@@ -8074,11 +8055,20 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);
+ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+
+ if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"))
+ goto err_shrinker;
return 0;
+err_shrinker:
+ put_net(net);
+ kfree(nn->sessionid_hashtbl);
err_sessionid:
kfree(nn->unconf_id_hashtbl);
err_unconf_id:
@@ -8171,6 +8161,8 @@ nfs4_state_shutdown_net(struct net *net)
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -8190,7 +8182,6 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
- rhltable_destroy(&nfs4_file_rhltable);
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
nfsd4_ssc_shutdown_umount(nn);
#endif
@@ -8200,6 +8191,7 @@ void
nfs4_state_shutdown(void)
{
nfsd4_destroy_callback_queue();
+ rhltable_destroy(&nfs4_file_rhltable);
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ebb4d02a42ce..97edb32be77f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
- __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
return true;
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d1e581a60480..c2577ee7ffb2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1457,9 +1457,7 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
- retval = nfsd4_init_leases_net(nn);
- if (retval)
- goto out_drc_error;
+ nfsd4_init_leases_net(nn);
retval = nfsd_reply_cache_init(nn);
if (retval)
goto out_cache_error;
@@ -1469,8 +1467,6 @@ static __net_init int nfsd_init_net(struct net *net)
return 0;
out_cache_error:
- nfsd4_leases_net_shutdown(nn);
-out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
@@ -1486,7 +1482,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
- nfsd4_leases_net_shutdown(nn);
}
static struct pernet_operations nfsd_net_ops = {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 93b42ef9ed91..fa0144a74267 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void);
extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
#endif
-extern int nfsd4_init_leases_net(struct nfsd_net *nn);
-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
+extern void nfsd4_init_leases_net(struct nfsd_net *nn);
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
@@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
return 0;
}
-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
+static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
#define register_cld_notifier() 0
#define unregister_cld_notifier() do { } while(0)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a5570cf75f3f..9744443c3965 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}
@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c852ae8eaf37..8f9c82d9e075 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire,
)
);
-TRACE_EVENT(nfsd_file_create,
- TP_PROTO(
- const struct svc_rqst *rqstp,
- unsigned int may_flags,
- const struct nfsd_file *nf
- ),
-
- TP_ARGS(rqstp, may_flags, nf),
-
- TP_STRUCT__entry(
- __field(const void *, nf_inode)
- __field(const void *, nf_file)
- __field(unsigned long, may_flags)
- __field(unsigned long, nf_flags)
- __field(unsigned long, nf_may)
- __field(unsigned int, nf_ref)
- __field(u32, xid)
- ),
-
- TP_fast_assign(
- __entry->nf_inode = nf->nf_inode;
- __entry->nf_file = nf->nf_file;
- __entry->may_flags = may_flags;
- __entry->nf_flags = nf->nf_flags;
- __entry->nf_may = nf->nf_may;
- __entry->nf_ref = refcount_read(&nf->nf_ref);
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
- ),
-
- TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
- __entry->xid, __entry->nf_inode,
- show_nfsd_may_flags(__entry->may_flags),
- __entry->nf_ref, show_nf_flags(__entry->nf_flags),
- show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
- )
-);
-
TRACE_EVENT(nfsd_file_insert_err,
TP_PROTO(
const struct svc_rqst *rqstp,
@@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err,
)
);
-TRACE_EVENT(nfsd_file_open,
- TP_PROTO(struct nfsd_file *nf, __be32 status),
+DECLARE_EVENT_CLASS(nfsd_file_open_class,
+ TP_PROTO(const struct nfsd_file *nf, __be32 status),
TP_ARGS(nf, status),
TP_STRUCT__entry(
__field(void *, nf_inode) /* cannot be dereferenced */
@@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open,
__entry->nf_file)
)
+#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \
+DEFINE_EVENT(nfsd_file_open_class, name, \
+ TP_PROTO( \
+ const struct nfsd_file *nf, \
+ __be32 status \
+ ), \
+ TP_ARGS(nf, status))
+
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
+
TRACE_EVENT(nfsd_file_is_cached,
TP_PROTO(
const struct inode *inode,
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b9d15c3df3cc..40ce92a332fe 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
&submit_ptr);
if (ret) {
- if (ret != -EEXIST)
- return ret;
- goto out_check;
+ if (likely(ret == -EEXIST))
+ goto out_check;
+ if (ret == -ENOENT) {
+ /*
+ * Block address translation failed due to invalid
+ * value of 'ptr'. In this case, return internal code
+ * -EINVAL (broken bmap) to notify bmap layer of fatal
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
if (ra) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 87e1004b606d..b4041d0566a9 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1114,7 +1114,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
minseg = range[0] + segbytes - 1;
do_div(minseg, segbytes);
+
+ if (range[1] < 4096)
+ goto out;
+
maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+ if (maxseg < segbytes)
+ goto out;
+
do_div(maxseg, segbytes);
maxseg--;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6edb6e0dd61f..1422b8ba24ed 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -409,6 +409,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
goto out;
/*
+ * Prevent underflow in second superblock position calculation.
+ * The exact minimum size check is done in nilfs_sufile_resize().
+ */
+ if (newsize < 4096) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
* Write lock is required to protect some functions depending
* on the number of segments, the number of reserved segments,
* and so forth.
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 2064e6473d30..3a4c9c150cbf 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -544,9 +544,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct buffer_head **sbh = nilfs->ns_sbh;
- u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev));
+ u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev);
int valid[2], swp = 0;
+ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) {
+ nilfs_err(sb, "device size too small");
+ return -EINVAL;
+ }
+ sb2off = NILFS_SB2_OFFSET_BYTES(devsize);
+
sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
&sbh[0]);
sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 6e4e65ee050d..c14e90764e35 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -792,7 +792,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (!c->metacopy && c->stat.size) {
err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size);
if (err)
- return err;
+ goto out_fput;
}
err = ovl_copy_up_metadata(c, temp);
@@ -1011,6 +1011,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (err)
return err;
+ if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) ||
+ !kgid_has_mapping(current_user_ns(), ctx.stat.gid))
+ return -EOVERFLOW;
+
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
if (parent) {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e35a0398db63..af1c49ae11b1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
page = pfn_swap_entry_to_page(swpent);
}
if (page) {
- int mapcount = page_mapcount(page);
-
- if (mapcount >= 2)
+ if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index b3fdc8212c5f..95f8e8901768 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
sizeof(u64))
/* xattr id lookup table defines */
-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
+#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
SQUASHFS_METADATA_SIZE)
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 659082e9e51d..72f6f4b37863 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -63,7 +63,7 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
unsigned int fragments;
- int xattr_ids;
+ unsigned int xattr_ids;
unsigned int ids;
bool panic_on_errors;
const struct squashfs_decompressor_thread_ops *thread_ops;
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index d8a270d3ac4c..f1a463d8bfa0 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -10,12 +10,12 @@
#ifdef CONFIG_SQUASHFS_XATTR
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
- u64 *, int *);
+ u64 *, unsigned int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
- u64 start, u64 *xattr_table_start, int *xattr_ids)
+ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_xattr_id_table *id_table;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 087cab8c78f4..c8469c656e0d 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
- u64 *xattr_table_start, int *xattr_ids)
+ u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
unsigned int len, indexes;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 98ac37e34e3d..cc694846617a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
return ctx->features & UFFD_FEATURE_INITIALIZED;
}
+static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP;
+
+ vma->vm_flags = flags;
+ /*
+ * For shared mappings, we want to enable writenotify while
+ * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply
+ * recalculate vma->vm_page_prot whenever userfaultfd-wp changes.
+ */
+ if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed)
+ vma_set_page_prot(vma);
+}
+
static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
int wake_flags, void *key)
{
@@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
for_each_vma(vmi, vma) {
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma,
+ vma->vm_flags & ~__VM_UFFD_FLAGS);
}
}
mmap_write_unlock(mm);
@@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
octx = vma->vm_userfaultfd_ctx.ctx;
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
return 0;
}
@@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
} else {
/* Drop uffd context if remap feature not enabled */
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
}
}
@@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
prev = vma;
}
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
mmap_write_unlock(mm);
@@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* the next vma was merged into the current one and
* the current one has not been updated yet.
*/
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx.ctx = ctx;
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
@@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* the next vma was merged into the current one and
* the current one has not been updated yet.
*/
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
skip:
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 2c53fbb8d918..a9c5c3f720ad 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -442,6 +442,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
data_size = zonefs_check_zone_condition(inode, zone,
false, false);
}
+ } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
+ data_size > isize) {
+ /* Do not expose garbage data */
+ data_size = isize;
}
/*
@@ -805,6 +809,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
ret = submit_bio_wait(bio);
+ /*
+ * If the file zone was written underneath the file system, the zone
+ * write pointer may not be where we expect it to be, but the zone
+ * append write can still succeed. So check manually that we wrote where
+ * we intended to, that is, at zi->i_wpoffset.
+ */
+ if (!ret) {
+ sector_t wpsector =
+ zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
+
+ if (bio->bi_iter.bi_sector != wpsector) {
+ zonefs_warn(inode->i_sb,
+ "Corrupted write pointer %llu for zone at %llu\n",
+ wpsector, zi->i_zsector);
+ ret = -EIO;
+ }
+ }
+
zonefs_file_write_dio_end_io(iocb, size, ret, 0);
trace_zonefs_file_dio_append(inode, size, ret);