aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig9
-rw-r--r--fs/Makefile4
-rw-r--r--fs/afs/dir.c3
-rw-r--r--fs/btrfs/backref.c19
-rw-r--r--fs/btrfs/backref.h6
-rw-r--r--fs/btrfs/bio.c6
-rw-r--r--fs/btrfs/block-group.c14
-rw-r--r--fs/btrfs/block-rsv.c3
-rw-r--r--fs/btrfs/ctree.c38
-rw-r--r--fs/btrfs/disk-io.c36
-rw-r--r--fs/btrfs/file-item.c9
-rw-r--r--fs/btrfs/free-space-cache.c7
-rw-r--r--fs/btrfs/free-space-tree.c50
-rw-r--r--fs/btrfs/free-space-tree.h3
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c57
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/btrfs/zoned.c17
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/snap.c17
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/erofs/Kconfig1
-rw-r--r--fs/erofs/Makefile4
-rw-r--r--fs/erofs/internal.h13
-rw-r--r--fs/erofs/xattr.c2
-rw-r--r--fs/erofs/zdata.c2
-rw-r--r--fs/ext4/balloc.c43
-rw-r--r--fs/ext4/ext4.h44
-rw-r--r--fs/ext4/extents_status.c30
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c6
-rw-r--r--fs/ext4/ialloc.c12
-rw-r--r--fs/ext4/inline.c17
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/mballoc.c86
-rw-r--r--fs/ext4/migrate.c11
-rw-r--r--fs/ext4/mmp.c30
-rw-r--r--fs/ext4/namei.c53
-rw-r--r--fs/ext4/super.c55
-rw-r--r--fs/ext4/xattr.c52
-rw-r--r--fs/gfs2/file.c17
-rw-r--r--fs/gfs2/super.c8
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfsd/nfsctl.c32
-rw-r--r--fs/nfsd/trace.h6
-rw-r--r--fs/nfsd/vfs.c10
-rw-r--r--fs/nilfs2/inode.c18
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c11
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/smb/Kconfig11
-rw-r--r--fs/smb/Makefile5
-rw-r--r--fs/smb/client/Kconfig (renamed from fs/cifs/Kconfig)0
-rw-r--r--fs/smb/client/Makefile (renamed from fs/cifs/Makefile)0
-rw-r--r--fs/smb/client/asn1.c (renamed from fs/cifs/asn1.c)0
-rw-r--r--fs/smb/client/cached_dir.c (renamed from fs/cifs/cached_dir.c)0
-rw-r--r--fs/smb/client/cached_dir.h (renamed from fs/cifs/cached_dir.h)0
-rw-r--r--fs/smb/client/cifs_debug.c (renamed from fs/cifs/cifs_debug.c)8
-rw-r--r--fs/smb/client/cifs_debug.h (renamed from fs/cifs/cifs_debug.h)0
-rw-r--r--fs/smb/client/cifs_dfs_ref.c (renamed from fs/cifs/cifs_dfs_ref.c)0
-rw-r--r--fs/smb/client/cifs_fs_sb.h (renamed from fs/cifs/cifs_fs_sb.h)0
-rw-r--r--fs/smb/client/cifs_ioctl.h (renamed from fs/cifs/cifs_ioctl.h)0
-rw-r--r--fs/smb/client/cifs_spnego.c (renamed from fs/cifs/cifs_spnego.c)0
-rw-r--r--fs/smb/client/cifs_spnego.h (renamed from fs/cifs/cifs_spnego.h)0
-rw-r--r--fs/smb/client/cifs_spnego_negtokeninit.asn1 (renamed from fs/cifs/cifs_spnego_negtokeninit.asn1)0
-rw-r--r--fs/smb/client/cifs_swn.c (renamed from fs/cifs/cifs_swn.c)0
-rw-r--r--fs/smb/client/cifs_swn.h (renamed from fs/cifs/cifs_swn.h)0
-rw-r--r--fs/smb/client/cifs_unicode.c (renamed from fs/cifs/cifs_unicode.c)0
-rw-r--r--fs/smb/client/cifs_unicode.h (renamed from fs/cifs/cifs_unicode.h)0
-rw-r--r--fs/smb/client/cifs_uniupr.h (renamed from fs/cifs/cifs_uniupr.h)0
-rw-r--r--fs/smb/client/cifsacl.c (renamed from fs/cifs/cifsacl.c)0
-rw-r--r--fs/smb/client/cifsacl.h (renamed from fs/cifs/cifsacl.h)0
-rw-r--r--fs/smb/client/cifsencrypt.c (renamed from fs/cifs/cifsencrypt.c)2
-rw-r--r--fs/smb/client/cifsfs.c (renamed from fs/cifs/cifsfs.c)18
-rw-r--r--fs/smb/client/cifsfs.h (renamed from fs/cifs/cifsfs.h)0
-rw-r--r--fs/smb/client/cifsglob.h (renamed from fs/cifs/cifsglob.h)6
-rw-r--r--fs/smb/client/cifspdu.h (renamed from fs/cifs/cifspdu.h)2
-rw-r--r--fs/smb/client/cifsproto.h (renamed from fs/cifs/cifsproto.h)0
-rw-r--r--fs/smb/client/cifsroot.c (renamed from fs/cifs/cifsroot.c)0
-rw-r--r--fs/smb/client/cifssmb.c (renamed from fs/cifs/cifssmb.c)0
-rw-r--r--fs/smb/client/connect.c (renamed from fs/cifs/connect.c)7
-rw-r--r--fs/smb/client/dfs.c (renamed from fs/cifs/dfs.c)2
-rw-r--r--fs/smb/client/dfs.h (renamed from fs/cifs/dfs.h)0
-rw-r--r--fs/smb/client/dfs_cache.c (renamed from fs/cifs/dfs_cache.c)0
-rw-r--r--fs/smb/client/dfs_cache.h (renamed from fs/cifs/dfs_cache.h)0
-rw-r--r--fs/smb/client/dir.c (renamed from fs/cifs/dir.c)0
-rw-r--r--fs/smb/client/dns_resolve.c (renamed from fs/cifs/dns_resolve.c)0
-rw-r--r--fs/smb/client/dns_resolve.h (renamed from fs/cifs/dns_resolve.h)0
-rw-r--r--fs/smb/client/export.c (renamed from fs/cifs/export.c)0
-rw-r--r--fs/smb/client/file.c (renamed from fs/cifs/file.c)29
-rw-r--r--fs/smb/client/fs_context.c (renamed from fs/cifs/fs_context.c)8
-rw-r--r--fs/smb/client/fs_context.h (renamed from fs/cifs/fs_context.h)0
-rw-r--r--fs/smb/client/fscache.c (renamed from fs/cifs/fscache.c)0
-rw-r--r--fs/smb/client/fscache.h (renamed from fs/cifs/fscache.h)0
-rw-r--r--fs/smb/client/inode.c (renamed from fs/cifs/inode.c)0
-rw-r--r--fs/smb/client/ioctl.c (renamed from fs/cifs/ioctl.c)6
-rw-r--r--fs/smb/client/link.c (renamed from fs/cifs/link.c)0
-rw-r--r--fs/smb/client/misc.c (renamed from fs/cifs/misc.c)0
-rw-r--r--fs/smb/client/netlink.c (renamed from fs/cifs/netlink.c)0
-rw-r--r--fs/smb/client/netlink.h (renamed from fs/cifs/netlink.h)0
-rw-r--r--fs/smb/client/netmisc.c (renamed from fs/cifs/netmisc.c)0
-rw-r--r--fs/smb/client/nterr.c (renamed from fs/cifs/nterr.c)0
-rw-r--r--fs/smb/client/nterr.h (renamed from fs/cifs/nterr.h)0
-rw-r--r--fs/smb/client/ntlmssp.h (renamed from fs/cifs/ntlmssp.h)0
-rw-r--r--fs/smb/client/readdir.c (renamed from fs/cifs/readdir.c)0
-rw-r--r--fs/smb/client/rfc1002pdu.h (renamed from fs/cifs/rfc1002pdu.h)0
-rw-r--r--fs/smb/client/sess.c (renamed from fs/cifs/sess.c)0
-rw-r--r--fs/smb/client/smb1ops.c (renamed from fs/cifs/smb1ops.c)9
-rw-r--r--fs/smb/client/smb2file.c (renamed from fs/cifs/smb2file.c)0
-rw-r--r--fs/smb/client/smb2glob.h (renamed from fs/cifs/smb2glob.h)0
-rw-r--r--fs/smb/client/smb2inode.c (renamed from fs/cifs/smb2inode.c)0
-rw-r--r--fs/smb/client/smb2maperror.c (renamed from fs/cifs/smb2maperror.c)0
-rw-r--r--fs/smb/client/smb2misc.c (renamed from fs/cifs/smb2misc.c)0
-rw-r--r--fs/smb/client/smb2ops.c (renamed from fs/cifs/smb2ops.c)10
-rw-r--r--fs/smb/client/smb2pdu.c (renamed from fs/cifs/smb2pdu.c)5
-rw-r--r--fs/smb/client/smb2pdu.h (renamed from fs/cifs/smb2pdu.h)0
-rw-r--r--fs/smb/client/smb2proto.h (renamed from fs/cifs/smb2proto.h)0
-rw-r--r--fs/smb/client/smb2status.h (renamed from fs/cifs/smb2status.h)0
-rw-r--r--fs/smb/client/smb2transport.c (renamed from fs/cifs/smb2transport.c)0
-rw-r--r--fs/smb/client/smbdirect.c (renamed from fs/cifs/smbdirect.c)0
-rw-r--r--fs/smb/client/smbdirect.h (renamed from fs/cifs/smbdirect.h)0
-rw-r--r--fs/smb/client/smbencrypt.c (renamed from fs/cifs/smbencrypt.c)2
-rw-r--r--fs/smb/client/smberr.h (renamed from fs/cifs/smberr.h)0
-rw-r--r--fs/smb/client/trace.c (renamed from fs/cifs/trace.c)0
-rw-r--r--fs/smb/client/trace.h (renamed from fs/cifs/trace.h)0
-rw-r--r--fs/smb/client/transport.c (renamed from fs/cifs/transport.c)0
-rw-r--r--fs/smb/client/unc.c (renamed from fs/cifs/unc.c)0
-rw-r--r--fs/smb/client/winucase.c (renamed from fs/cifs/winucase.c)0
-rw-r--r--fs/smb/client/xattr.c (renamed from fs/cifs/xattr.c)0
-rw-r--r--fs/smb/common/Makefile (renamed from fs/smbfs_common/Makefile)4
-rw-r--r--fs/smb/common/arc4.h (renamed from fs/smbfs_common/arc4.h)0
-rw-r--r--fs/smb/common/cifs_arc4.c (renamed from fs/smbfs_common/cifs_arc4.c)0
-rw-r--r--fs/smb/common/cifs_md4.c (renamed from fs/smbfs_common/cifs_md4.c)0
-rw-r--r--fs/smb/common/md4.h (renamed from fs/smbfs_common/md4.h)0
-rw-r--r--fs/smb/common/smb2pdu.h (renamed from fs/smbfs_common/smb2pdu.h)0
-rw-r--r--fs/smb/common/smbfsctl.h (renamed from fs/smbfs_common/smbfsctl.h)0
-rw-r--r--fs/smb/server/Kconfig (renamed from fs/ksmbd/Kconfig)0
-rw-r--r--fs/smb/server/Makefile (renamed from fs/ksmbd/Makefile)0
-rw-r--r--fs/smb/server/asn1.c (renamed from fs/ksmbd/asn1.c)0
-rw-r--r--fs/smb/server/asn1.h (renamed from fs/ksmbd/asn1.h)0
-rw-r--r--fs/smb/server/auth.c (renamed from fs/ksmbd/auth.c)2
-rw-r--r--fs/smb/server/auth.h (renamed from fs/ksmbd/auth.h)0
-rw-r--r--fs/smb/server/connection.c (renamed from fs/ksmbd/connection.c)3
-rw-r--r--fs/smb/server/connection.h (renamed from fs/ksmbd/connection.h)0
-rw-r--r--fs/smb/server/crypto_ctx.c (renamed from fs/ksmbd/crypto_ctx.c)0
-rw-r--r--fs/smb/server/crypto_ctx.h (renamed from fs/ksmbd/crypto_ctx.h)0
-rw-r--r--fs/smb/server/glob.h (renamed from fs/ksmbd/glob.h)0
-rw-r--r--fs/smb/server/ksmbd_netlink.h (renamed from fs/ksmbd/ksmbd_netlink.h)0
-rw-r--r--fs/smb/server/ksmbd_spnego_negtokeninit.asn1 (renamed from fs/ksmbd/ksmbd_spnego_negtokeninit.asn1)0
-rw-r--r--fs/smb/server/ksmbd_spnego_negtokentarg.asn1 (renamed from fs/ksmbd/ksmbd_spnego_negtokentarg.asn1)0
-rw-r--r--fs/smb/server/ksmbd_work.c (renamed from fs/ksmbd/ksmbd_work.c)0
-rw-r--r--fs/smb/server/ksmbd_work.h (renamed from fs/ksmbd/ksmbd_work.h)0
-rw-r--r--fs/smb/server/mgmt/ksmbd_ida.c (renamed from fs/ksmbd/mgmt/ksmbd_ida.c)0
-rw-r--r--fs/smb/server/mgmt/ksmbd_ida.h (renamed from fs/ksmbd/mgmt/ksmbd_ida.h)0
-rw-r--r--fs/smb/server/mgmt/share_config.c (renamed from fs/ksmbd/mgmt/share_config.c)0
-rw-r--r--fs/smb/server/mgmt/share_config.h (renamed from fs/ksmbd/mgmt/share_config.h)0
-rw-r--r--fs/smb/server/mgmt/tree_connect.c (renamed from fs/ksmbd/mgmt/tree_connect.c)0
-rw-r--r--fs/smb/server/mgmt/tree_connect.h (renamed from fs/ksmbd/mgmt/tree_connect.h)0
-rw-r--r--fs/smb/server/mgmt/user_config.c (renamed from fs/ksmbd/mgmt/user_config.c)0
-rw-r--r--fs/smb/server/mgmt/user_config.h (renamed from fs/ksmbd/mgmt/user_config.h)0
-rw-r--r--fs/smb/server/mgmt/user_session.c (renamed from fs/ksmbd/mgmt/user_session.c)0
-rw-r--r--fs/smb/server/mgmt/user_session.h (renamed from fs/ksmbd/mgmt/user_session.h)0
-rw-r--r--fs/smb/server/misc.c (renamed from fs/ksmbd/misc.c)0
-rw-r--r--fs/smb/server/misc.h (renamed from fs/ksmbd/misc.h)0
-rw-r--r--fs/smb/server/ndr.c (renamed from fs/ksmbd/ndr.c)0
-rw-r--r--fs/smb/server/ndr.h (renamed from fs/ksmbd/ndr.h)0
-rw-r--r--fs/smb/server/nterr.h (renamed from fs/ksmbd/nterr.h)0
-rw-r--r--fs/smb/server/ntlmssp.h (renamed from fs/ksmbd/ntlmssp.h)0
-rw-r--r--fs/smb/server/oplock.c (renamed from fs/ksmbd/oplock.c)77
-rw-r--r--fs/smb/server/oplock.h (renamed from fs/ksmbd/oplock.h)2
-rw-r--r--fs/smb/server/server.c (renamed from fs/ksmbd/server.c)0
-rw-r--r--fs/smb/server/server.h (renamed from fs/ksmbd/server.h)0
-rw-r--r--fs/smb/server/smb2misc.c (renamed from fs/ksmbd/smb2misc.c)5
-rw-r--r--fs/smb/server/smb2ops.c (renamed from fs/ksmbd/smb2ops.c)0
-rw-r--r--fs/smb/server/smb2pdu.c (renamed from fs/ksmbd/smb2pdu.c)115
-rw-r--r--fs/smb/server/smb2pdu.h (renamed from fs/ksmbd/smb2pdu.h)0
-rw-r--r--fs/smb/server/smb_common.c (renamed from fs/ksmbd/smb_common.c)0
-rw-r--r--fs/smb/server/smb_common.h (renamed from fs/ksmbd/smb_common.h)2
-rw-r--r--fs/smb/server/smbacl.c (renamed from fs/ksmbd/smbacl.c)0
-rw-r--r--fs/smb/server/smbacl.h (renamed from fs/ksmbd/smbacl.h)0
-rw-r--r--fs/smb/server/smbfsctl.h (renamed from fs/ksmbd/smbfsctl.h)2
-rw-r--r--fs/smb/server/smbstatus.h (renamed from fs/ksmbd/smbstatus.h)2
-rw-r--r--fs/smb/server/transport_ipc.c (renamed from fs/ksmbd/transport_ipc.c)0
-rw-r--r--fs/smb/server/transport_ipc.h (renamed from fs/ksmbd/transport_ipc.h)0
-rw-r--r--fs/smb/server/transport_rdma.c (renamed from fs/ksmbd/transport_rdma.c)0
-rw-r--r--fs/smb/server/transport_rdma.h (renamed from fs/ksmbd/transport_rdma.h)0
-rw-r--r--fs/smb/server/transport_tcp.c (renamed from fs/ksmbd/transport_tcp.c)0
-rw-r--r--fs/smb/server/transport_tcp.h (renamed from fs/ksmbd/transport_tcp.h)0
-rw-r--r--fs/smb/server/unicode.c (renamed from fs/ksmbd/unicode.c)0
-rw-r--r--fs/smb/server/unicode.h (renamed from fs/ksmbd/unicode.h)0
-rw-r--r--fs/smb/server/uniupr.h (renamed from fs/ksmbd/uniupr.h)0
-rw-r--r--fs/smb/server/vfs.c (renamed from fs/ksmbd/vfs.c)9
-rw-r--r--fs/smb/server/vfs.h (renamed from fs/ksmbd/vfs.h)0
-rw-r--r--fs/smb/server/vfs_cache.c (renamed from fs/ksmbd/vfs_cache.c)0
-rw-r--r--fs/smb/server/vfs_cache.h (renamed from fs/ksmbd/vfs_cache.h)0
-rw-r--r--fs/smb/server/xattr.h (renamed from fs/ksmbd/xattr.h)0
-rw-r--r--fs/statfs.c4
-rw-r--r--fs/xattr.c15
-rw-r--r--fs/xfs/libxfs/xfs_ag.c24
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c91
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c15
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c7
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c24
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h9
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c13
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c113
-rw-r--r--fs/xfs/scrub/bmap.c29
-rw-r--r--fs/xfs/scrub/common.c26
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/fscounters.c13
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/scrub.h9
-rw-r--r--fs/xfs/scrub/trace.h1
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_buf_item.c88
-rw-r--r--fs/xfs/xfs_filestream.c1
-rw-r--r--fs/xfs/xfs_icache.c86
-rw-r--r--fs/xfs/xfs_icache.h4
-rw-r--r--fs/xfs/xfs_inode.c20
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c149
-rw-r--r--fs/xfs/xfs_inode_item.h1
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_log_recover.c19
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_reflink.c4
-rw-r--r--fs/xfs/xfs_super.c4
-rw-r--r--fs/xfs/xfs_trans.c9
236 files changed, 1426 insertions, 715 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index cc07a0cd3172..18d034ec7953 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -368,14 +368,7 @@ config NFS_V4_2_SSC_HELPER
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
-source "fs/cifs/Kconfig"
-source "fs/ksmbd/Kconfig"
-
-config SMBFS_COMMON
- tristate
- default y if CIFS=y || SMB_SERVER=y
- default m if CIFS=m || SMB_SERVER=m
-
+source "fs/smb/Kconfig"
source "fs/coda/Kconfig"
source "fs/afs/Kconfig"
source "fs/9p/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 834f1c3dba46..5bfdbf0d7037 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -95,9 +95,7 @@ obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-y += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
-obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/
-obj-$(CONFIG_CIFS) += cifs/
-obj-$(CONFIG_SMB_SERVER) += ksmbd/
+obj-$(CONFIG_SMBFS) += smb/
obj-$(CONFIG_HPFS_FS) += hpfs/
obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_NTFS3_FS) += ntfs3/
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 4dd97afa536c..5219182e52e1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1358,6 +1358,7 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
op->dentry = dentry;
op->create.mode = S_IFDIR | mode;
op->create.reason = afs_edit_dir_for_mkdir;
+ op->mtime = current_time(dir);
op->ops = &afs_mkdir_operation;
return afs_do_sync_operation(op);
}
@@ -1661,6 +1662,7 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
op->dentry = dentry;
op->create.mode = S_IFREG | mode;
op->create.reason = afs_edit_dir_for_create;
+ op->mtime = current_time(dir);
op->ops = &afs_create_operation;
return afs_do_sync_operation(op);
@@ -1796,6 +1798,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
op->ops = &afs_symlink_operation;
op->create.reason = afs_edit_dir_for_symlink;
op->create.symlink = content;
+ op->mtime = current_time(dir);
return afs_do_sync_operation(op);
error:
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e54f0884802a..79336fa853db 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -45,7 +45,8 @@ static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
int root_count;
bool cached;
- if (!btrfs_file_extent_compression(eb, fi) &&
+ if (!ctx->ignore_extent_item_pos &&
+ !btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset;
@@ -552,7 +553,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
count++;
else
goto next;
- if (!ctx->ignore_extent_item_pos) {
+ if (!ctx->skip_inode_ref_list) {
ret = check_extent_in_eb(ctx, &key, eb, fi, &eie);
if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP ||
ret < 0)
@@ -564,7 +565,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
eie, (void **)&old, GFP_NOFS);
if (ret < 0)
break;
- if (!ret && !ctx->ignore_extent_item_pos) {
+ if (!ret && !ctx->skip_inode_ref_list) {
while (old->next)
old = old->next;
old->next = eie;
@@ -1606,7 +1607,7 @@ again:
goto out;
}
if (ref->count && ref->parent) {
- if (!ctx->ignore_extent_item_pos && !ref->inode_list &&
+ if (!ctx->skip_inode_ref_list && !ref->inode_list &&
ref->level == 0) {
struct btrfs_tree_parent_check check = { 0 };
struct extent_buffer *eb;
@@ -1647,7 +1648,7 @@ again:
(void **)&eie, GFP_NOFS);
if (ret < 0)
goto out;
- if (!ret && !ctx->ignore_extent_item_pos) {
+ if (!ret && !ctx->skip_inode_ref_list) {
/*
* We've recorded that parent, so we must extend
* its inode list here.
@@ -1743,7 +1744,7 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx)
static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
{
const u64 orig_bytenr = ctx->bytenr;
- const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos;
+ const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list;
bool roots_ulist_allocated = false;
struct ulist_iterator uiter;
int ret = 0;
@@ -1764,7 +1765,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
roots_ulist_allocated = true;
}
- ctx->ignore_extent_item_pos = true;
+ ctx->skip_inode_ref_list = true;
ULIST_ITER_INIT(&uiter);
while (1) {
@@ -1789,7 +1790,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
ulist_free(ctx->refs);
ctx->refs = NULL;
ctx->bytenr = orig_bytenr;
- ctx->ignore_extent_item_pos = orig_ignore_extent_item_pos;
+ ctx->skip_inode_ref_list = orig_skip_inode_ref_list;
return ret;
}
@@ -1912,7 +1913,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
goto out_trans;
}
- walk_ctx.ignore_extent_item_pos = true;
+ walk_ctx.skip_inode_ref_list = true;
walk_ctx.trans = trans;
walk_ctx.fs_info = fs_info;
walk_ctx.refs = &ctx->refs;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ef6bbea3f456..1616e3e3f1e4 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -60,6 +60,12 @@ struct btrfs_backref_walk_ctx {
* @extent_item_pos is ignored.
*/
bool ignore_extent_item_pos;
+ /*
+ * If true and bytenr corresponds to a data extent, then the inode list
+ * (each member describing inode number, file offset and root) is not
+ * added to each reference added to the @refs ulist.
+ */
+ bool skip_inode_ref_list;
/* A valid transaction handle or NULL. */
struct btrfs_trans_handle *trans;
/*
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 5379c4714905..b3ad0f51e616 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -330,7 +330,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
- bbio->end_io(bbio);
+ btrfs_orig_bbio_end_io(bbio);
}
static void btrfs_simple_end_io(struct bio *bio)
@@ -811,10 +811,6 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
goto fail;
if (dev_replace) {
- if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
- bbio->bio.bi_opf &= ~REQ_OP_WRITE;
- bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
- }
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
smap.dev = fs_info->dev_replace.tgtdev;
}
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 957ad1c31c4f..590b03560265 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2818,10 +2818,20 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
}
ret = inc_block_group_ro(cache, 0);
- if (!do_chunk_alloc || ret == -ETXTBSY)
- goto unlock_out;
if (!ret)
goto out;
+ if (ret == -ETXTBSY)
+ goto unlock_out;
+
+ /*
+ * Skip chunk alloction if the bg is SYSTEM, this is to avoid system
+ * chunk allocation storm to exhaust the system chunk array. Otherwise
+ * we still want to try our best to mark the block group read-only.
+ */
+ if (!do_chunk_alloc && ret == -ENOSPC &&
+ (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
+ goto unlock_out;
+
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 3ab707e26fa2..ac18c43fadad 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -124,7 +124,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ if (qgroup_to_release_ret &&
+ block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
block_rsv->qgroup_rsv_size;
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3c983c70028a..2ff2961b1183 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2627,6 +2627,10 @@ static bool check_sibling_keys(struct extent_buffer *left,
}
if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
+ btrfs_crit(left->fs_info, "left extent buffer:");
+ btrfs_print_tree(left, false);
+ btrfs_crit(left->fs_info, "right extent buffer:");
+ btrfs_print_tree(right, false);
btrfs_crit(left->fs_info,
"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
left_last.objectid, left_last.type,
@@ -3215,6 +3219,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
@@ -3433,6 +3438,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
return __push_leaf_left(trans, path, min_data_size, empty, left,
@@ -4478,10 +4484,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
+ struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
if (key.offset > 0) {
key.offset--;
@@ -4498,8 +4506,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret <= 0)
return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59ea049fe7ee..2b1b227505f3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -96,7 +96,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
first_page_part - BTRFS_CSUM_SIZE);
- for (i = 1; i < num_pages; i++) {
+ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
kaddr = page_address(buf->pages[i]);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
}
@@ -3121,23 +3121,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
{
int ret;
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
- bool clear_free_space_tree = false;
+ bool rebuild_free_space_tree = false;
if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
btrfs_warn(fs_info, "free space tree is invalid");
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
}
- if (clear_free_space_tree) {
- btrfs_info(fs_info, "clearing free space tree");
- ret = btrfs_clear_free_space_tree(fs_info);
+ if (rebuild_free_space_tree) {
+ btrfs_info(fs_info, "rebuilding free space tree");
+ ret = btrfs_rebuild_free_space_tree(fs_info);
if (ret) {
btrfs_warn(fs_info,
- "failed to clear free space tree: %d", ret);
+ "failed to rebuild free space tree: %d", ret);
+ goto out;
+ }
+ }
+
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
+ btrfs_info(fs_info, "disabling free space tree");
+ ret = btrfs_delete_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to disable free space tree: %d", ret);
goto out;
}
}
@@ -4925,7 +4936,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
*/
inode = igrab(&btrfs_inode->vfs_inode);
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
iput(inode);
}
spin_lock(&root->delalloc_lock);
@@ -5031,7 +5046,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
inode = cache->io_ctl.inode;
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
+
BTRFS_I(inode)->generation = 0;
cache->io_ctl.inode = NULL;
iput(inode);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 018c711a0bc8..d1cd0a692f8e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -52,13 +52,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
u64 start, end, i_size;
int ret;
+ spin_lock(&inode->lock);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
inode->disk_i_size = i_size;
- return;
+ goto out_unlock;
}
- spin_lock(&inode->lock);
ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
&end, EXTENT_DIRTY);
if (!ret && start == 0)
@@ -66,6 +66,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
else
i_size = 0;
inode->disk_i_size = i_size;
+out_unlock:
spin_unlock(&inode->lock);
}
@@ -791,7 +792,9 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
bytes_left), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
- BUG_ON(!sums); /* -ENOMEM */
+ if (!sums)
+ return BLK_STS_RESOURCE;
+
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
offset);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d84cef89cdff..cf98a3c05480 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -870,15 +870,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- recalculate_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
if (ret) {
+ spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
+ ctl->total_bitmaps++;
+ recalculate_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 4d155a48ec59..b21da1446f2a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1252,7 +1252,7 @@ out:
return ret;
}
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -1298,6 +1298,54 @@ abort:
return ret;
}
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+ struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(free_space_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (ret)
+ goto abort;
+
+ node = rb_first_cached(&fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *block_group;
+
+ block_group = rb_entry(node, struct btrfs_block_group,
+ cache_node);
+ ret = populate_free_space_tree(trans, block_group);
+ if (ret)
+ goto abort;
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+
+ ret = btrfs_commit_transaction(trans);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+abort:
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+}
+
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index dc2463e4cfe3..6d5551d0ced8 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -18,7 +18,8 @@ struct btrfs_caching_control;
void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 57d070025c7a..19c707bc8801 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3108,6 +3108,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
+ } else if (btrfs_is_data_reloc_root(inode->root)) {
+ btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
}
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 25833b4eeaf5..2fa36f694daa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -454,7 +454,9 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
case BTRFS_EXCLOP_BALANCE_PAUSED:
spin_lock(&fs_info->super_lock);
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
+ fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
spin_unlock(&fs_info->super_lock);
break;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index b93c96213304..497b9dbd8a13 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -151,10 +151,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
pr_cont("shared data backref parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
/*
- * offset is supposed to be a tree block which
- * must be aligned to nodesize.
+ * Offset is supposed to be a tree block which must be
+ * aligned to sectorsize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
pr_info(
"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
offset, eb->fs_info->sectorsize);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 09b1988d1791..59a06499c647 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3422,7 +3422,7 @@ int add_data_references(struct reloc_control *rc,
btrfs_release_path(path);
ctx.bytenr = extent_key->objectid;
- ctx.ignore_extent_item_pos = true;
+ ctx.skip_inode_ref_list = true;
ctx.fs_info = rc->extent_root->fs_info;
ret = btrfs_find_all_leafs(&ctx);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 836725a19661..7c666517d3d3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1137,6 +1137,35 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
wake_up(&stripe->io_wait);
}
+static void scrub_submit_write_bio(struct scrub_ctx *sctx,
+ struct scrub_stripe *stripe,
+ struct btrfs_bio *bbio, bool dev_replace)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ u32 bio_len = bbio->bio.bi_iter.bi_size;
+ u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) -
+ stripe->logical;
+
+ fill_writer_pointer_gap(sctx, stripe->physical + bio_off);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
+ if (!btrfs_is_zoned(fs_info))
+ return;
+ /*
+ * For zoned writeback, queue depth must be 1, thus we must wait for
+ * the write to finish before the next write.
+ */
+ wait_scrub_stripe_io(stripe);
+
+ /*
+ * And also need to update the write pointer if write finished
+ * successfully.
+ */
+ if (!test_bit(bio_off >> fs_info->sectorsize_bits,
+ &stripe->write_error_bitmap))
+ sctx->write_pointer += bio_len;
+}
+
/*
* Submit the write bio(s) for the sectors specified by @write_bitmap.
*
@@ -1155,7 +1184,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- const bool zoned = btrfs_is_zoned(fs_info);
int sector_nr;
for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) {
@@ -1168,13 +1196,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
/* Cannot merge with previous sector, submit the current one. */
if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) {
- fill_writer_pointer_gap(sctx, stripe->physical +
- (sector_nr << fs_info->sectorsize_bits));
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- /* For zoned writeback, queue depth must be 1. */
- if (zoned)
- wait_scrub_stripe_io(stripe);
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
if (!bbio) {
@@ -1187,14 +1209,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
ASSERT(ret == fs_info->sectorsize);
}
- if (bbio) {
- fill_writer_pointer_gap(sctx, bbio->bio.bi_iter.bi_sector <<
- SECTOR_SHIFT);
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- if (zoned)
- wait_scrub_stripe_io(stripe);
- }
+ if (bbio)
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
}
/*
@@ -2518,13 +2534,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (ret == 0) {
ro_set = 1;
- } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
+ } else if (ret == -ENOSPC && !sctx->is_dev_replace &&
+ !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
* It is not a problem for scrub, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
+ *
+ * For RAID56 chunks, we have to mark them read-only
+ * for scrub, as later we would use our own cache
+ * out of RAID56 realm.
+ * Thus we want the RAID56 bg to be marked RO to
+ * prevent RMW from screwing up out cache.
*/
ro_set = 0;
} else if (ret == -ETXTBSY) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6cb97efee976..ec18e2210602 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -826,7 +826,11 @@ out:
!btrfs_test_opt(info, CLEAR_CACHE)) {
btrfs_err(info, "cannot disable free space tree");
ret = -EINVAL;
-
+ }
+ if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
+ !btrfs_test_opt(info, FREE_SPACE_TREE)) {
+ btrfs_err(info, "cannot disable free space tree with block-group-tree feature");
+ ret = -EINVAL;
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9b212e8c70cc..d2755d5e338b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6158,7 +6158,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log = inode->root->log_root;
const struct btrfs_delayed_item *curr;
- u64 last_range_start;
+ u64 last_range_start = 0;
u64 last_range_end = 0;
struct btrfs_key key;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 03f52e4a20aa..841e799dece5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -395,6 +395,7 @@ void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
+ extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a9b32ba6b2ce..39828af4a4e8 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -122,10 +122,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
int i;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
- u64 bytenr;
-
- bytenr = ((zones[i].start + zones[i].len)
- << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
+ u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
+ u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
+ BTRFS_SUPER_INFO_SIZE;
page[i] = read_cache_page_gfp(mapping,
bytenr >> PAGE_SHIFT, GFP_NOFS);
@@ -1168,12 +1167,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
return -ERANGE;
/* All the zones are conventional */
- if (find_next_bit(zinfo->seq_zones, begin, end) == end)
+ if (find_next_bit(zinfo->seq_zones, end, begin) == end)
return 0;
/* All the zones are sequential and empty */
- if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end &&
- find_next_zero_bit(zinfo->empty_zones, begin, end) == end)
+ if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
+ find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
return 0;
for (pos = start; pos < start + size; pos += zinfo->zone_size) {
@@ -1610,11 +1609,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
!list_empty(&eb->release_list))
return;
+ memzero_extent_buffer(eb, 0, eb->len);
+ set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
set_extent_buffer_dirty(eb);
set_extent_bits_nowait(&trans->dirty_pages, eb->start,
eb->start + eb->len - 1, EXTENT_DIRTY);
- memzero_extent_buffer(eb, 0, eb->len);
- set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
spin_lock(&trans->releasing_ebs_lock);
list_add_tail(&eb->release_list, &trans->releasing_ebs);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 789be30d6ee2..2321e5ddb664 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1627,6 +1627,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL;
+ bool need_put = false;
int mds;
dout("ceph_flush_snaps %p\n", inode);
@@ -1671,8 +1672,13 @@ out:
ceph_put_mds_session(session);
/* we flushed them all; remove this inode from the queue */
spin_lock(&mdsc->snap_flush_lock);
+ if (!list_empty(&ci->i_snap_flush_item))
+ need_put = true;
list_del_init(&ci->i_snap_flush_item);
spin_unlock(&mdsc->snap_flush_lock);
+
+ if (need_put)
+ iput(inode);
}
/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 29cf00220b09..4c0f22acf53d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3942,7 +3942,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
struct dentry *dentry;
struct ceph_cap *cap;
char *path;
- int pathlen = 0, err = 0;
+ int pathlen = 0, err;
u64 pathbase;
u64 snap_follows;
@@ -3965,6 +3965,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
+ err = 0;
goto out_err;
}
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 87007203f130..2e73ba62bd7a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -693,8 +693,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
- if (list_empty(&ci->i_snap_flush_item))
+ if (list_empty(&ci->i_snap_flush_item)) {
+ ihold(inode);
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ }
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}
@@ -1111,6 +1113,19 @@ skip_inode:
continue;
adjust_snap_realm_parent(mdsc, child, realm->ino);
}
+ } else {
+ /*
+ * In the non-split case both 'num_split_inos' and
+ * 'num_split_realms' should be 0, making this a no-op.
+ * However the MDS happens to populate 'split_realms' list
+ * in one of the UPDATE op cases by mistake.
+ *
+ * Skip both lists just in case to ensure that 'p' is
+ * positioned at the start of realm info, as expected by
+ * ceph_update_snap_trace().
+ */
+ p += sizeof(u64) * num_split_inos;
+ p += sizeof(u64) * num_split_realms;
}
/*
diff --git a/fs/coredump.c b/fs/coredump.c
index ece7badf701b..88740c51b942 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -371,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code)
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
- nr++;
+ /* The vhost_worker does not particpate in coredumps */
+ if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER)
+ nr++;
}
}
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 704fb59577e0..f259d92c9720 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -121,6 +121,7 @@ config EROFS_FS_PCPU_KTHREAD
config EROFS_FS_PCPU_KTHREAD_HIPRI
bool "EROFS high priority per-CPU kthread workers"
depends on EROFS_FS_ZIP && EROFS_FS_PCPU_KTHREAD
+ default y
help
This permits EROFS to configure per-CPU kthread workers to run
at higher priority.
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 99bbc597a3e9..a3a98fc3e481 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_EROFS_FS) += erofs.o
-erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
+erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index af0431a40647..1e39c03357d1 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -472,12 +472,6 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
return NULL;
}
-void *erofs_get_pcpubuf(unsigned int requiredpages);
-void erofs_put_pcpubuf(void *ptr);
-int erofs_pcpubuf_growsize(unsigned int nrpages);
-void __init erofs_pcpubuf_init(void);
-void erofs_pcpubuf_exit(void);
-
int erofs_register_sysfs(struct super_block *sb);
void erofs_unregister_sysfs(struct super_block *sb);
int __init erofs_init_sysfs(void);
@@ -512,6 +506,11 @@ int z_erofs_load_lz4_config(struct super_block *sb,
struct z_erofs_lz4_cfgs *lz4, int len);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags);
+void *erofs_get_pcpubuf(unsigned int requiredpages);
+void erofs_put_pcpubuf(void *ptr);
+int erofs_pcpubuf_growsize(unsigned int nrpages);
+void __init erofs_pcpubuf_init(void);
+void erofs_pcpubuf_exit(void);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
@@ -529,6 +528,8 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
return 0;
}
+static inline void erofs_pcpubuf_init(void) {}
+static inline void erofs_pcpubuf_exit(void) {}
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index cd80499351e0..bbfe7ce170d2 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -675,7 +675,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
if (!pfs)
return -ENOMEM;
- if (erofs_sb_has_fragments(sbi))
+ if (sbi->packed_inode)
buf.inode = sbi->packed_inode;
else
erofs_init_metabuf(&buf, sb);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 45f21db2303a..160b3da43aec 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -369,8 +369,6 @@ static struct kthread_worker *erofs_init_percpu_worker(int cpu)
return worker;
if (IS_ENABLED(CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI))
sched_set_fifo_low(worker->task);
- else
- sched_set_normal(worker->task, 0);
return worker;
}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 094269488183..c1edde817be8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -305,6 +305,38 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}
+static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ ext4_grpblk_t next_zero_bit;
+ unsigned long bitmap_size = sb->s_blocksize * 8;
+ unsigned int offset = num_clusters_in_group(sb, block_group);
+
+ if (bitmap_size <= offset)
+ return 0;
+
+ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
+
+ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
+}
+
+struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+ ext4_group_t group)
+{
+ struct ext4_group_info **grp_info;
+ long indexv, indexh;
+
+ if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) {
+ ext4_error(sb, "invalid group %u", group);
+ return NULL;
+ }
+ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+ return grp_info[indexh];
+}
+
/*
* Return the block number which was discovered to be invalid, or 0 if
* the block bitmap is valid.
@@ -379,7 +411,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
if (buffer_verified(bh))
return 0;
- if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
@@ -402,6 +434,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
return -EFSCORRUPTED;
}
+ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
+ block_group, blk);
+ ext4_mark_group_bitmap_corrupted(sb, block_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ return -EFSCORRUPTED;
+ }
set_buffer_verified(bh);
verified:
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 18cb2680dc39..8104a21b001a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -918,11 +918,13 @@ do { \
* where the second inode has larger inode number
* than the first
* I_DATA_SEM_QUOTA - Used for quota inodes only
+ * I_DATA_SEM_EA - Used for ea_inodes only
*/
enum {
I_DATA_SEM_NORMAL = 0,
I_DATA_SEM_OTHER,
I_DATA_SEM_QUOTA,
+ I_DATA_SEM_EA
};
@@ -1684,6 +1686,30 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
+static inline int ext4_writepages_down_read(struct super_block *sb)
+{
+ percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ return memalloc_nofs_save();
+}
+
+static inline void ext4_writepages_up_read(struct super_block *sb, int ctx)
+{
+ memalloc_nofs_restore(ctx);
+ percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
+}
+
+static inline int ext4_writepages_down_write(struct super_block *sb)
+{
+ percpu_down_write(&EXT4_SB(sb)->s_writepages_rwsem);
+ return memalloc_nofs_save();
+}
+
+static inline void ext4_writepages_up_write(struct super_block *sb, int ctx)
+{
+ memalloc_nofs_restore(ctx);
+ percpu_up_write(&EXT4_SB(sb)->s_writepages_rwsem);
+}
+
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@@ -2625,6 +2651,8 @@ extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
+extern struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+ ext4_group_t group);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
@@ -2875,7 +2903,8 @@ typedef enum {
EXT4_IGET_NORMAL = 0,
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
- EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */
+ EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */
+ EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */
} ext4_iget_flags;
extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -3232,19 +3261,6 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
}
-static inline
-struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
- ext4_group_t group)
-{
- struct ext4_group_info **grp_info;
- long indexv, indexh;
- BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
- indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
- indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
- grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
- return grp_info[indexh];
-}
-
/*
* Reading s_groups_count requires using smp_rmb() afterwards. See
* the locking protocol documented in the comments of ext4_group_add()
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 7bc221038c6c..595abb9e7d74 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -267,14 +267,12 @@ static void __es_find_extent_range(struct inode *inode,
/* see if the extent has been cached */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u) %llu %x\n",
- lblk, es1->es_lblk, es1->es_len,
- ext4_es_pblock(es1), ext4_es_status(es1));
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u) %llu %x\n",
+ lblk, es1->es_lblk, es1->es_len,
+ ext4_es_pblock(es1), ext4_es_status(es1));
+ goto out;
}
es1 = __es_tree_search(&tree->root, lblk);
@@ -293,7 +291,7 @@ out:
}
if (es1 && matching_fn(es1)) {
- tree->cache_es = es1;
+ WRITE_ONCE(tree->cache_es, es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
@@ -931,14 +929,12 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
/* find extent in cache firstly */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u)\n",
- lblk, es1->es_lblk, es1->es_len);
- found = 1;
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u)\n",
+ lblk, es1->es_lblk, es1->es_len);
+ found = 1;
+ goto out;
}
node = tree->root.rb_node;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index f65fdb27ce14..2a143209aa0c 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -108,6 +108,13 @@ static int ext4_fsync_journal(struct inode *inode, bool datasync,
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
+ /*
+ * Fastcommit does not really support fsync on directories or other
+ * special files. Force a full commit.
+ */
+ if (!S_ISREG(inode->i_mode))
+ return ext4_force_commit(inode->i_sb);
+
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
*needs_barrier = true;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 147b5241dd94..46c3423ddfa1 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -277,7 +277,11 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len,
}
default:
hinfo->hash = 0;
- return -1;
+ hinfo->minor_hash = 0;
+ ext4_warning(dir->i_sb,
+ "invalid/unsupported hash tree version %u",
+ hinfo->hash_version);
+ return -EINVAL;
}
hash = hash & ~1;
if (hash == (EXT4_HTREE_EOF_32BIT << 1))
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 787ab89c2c26..754f961cd9fd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -91,7 +91,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (buffer_verified(bh))
return 0;
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
@@ -293,7 +293,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
}
if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
grp = ext4_get_group_info(sb, block_group);
- if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
+ if (!grp || unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
fatal = -EFSCORRUPTED;
goto error_return;
}
@@ -1046,7 +1046,7 @@ got_group:
* Skip groups with already-known suspicious inode
* tables
*/
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
goto next_group;
}
@@ -1183,6 +1183,10 @@ got:
if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
grp = ext4_get_group_info(sb, group);
+ if (!grp) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
down_read(&grp->alloc_sem); /*
* protect vs itable
* lazyinit
@@ -1526,7 +1530,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
- if (!gdp)
+ if (!gdp || !grp)
goto out;
/*
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 859bc4e2c9b0..5854bd5a3352 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -34,6 +34,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
struct ext4_inode *raw_inode;
+ void *end;
int free, min_offs;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
@@ -57,14 +58,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
/* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ while (!IS_LAST_ENTRY(entry)) {
+ void *next = EXT4_XATTR_NEXT(entry);
+
+ if (next >= end) {
+ EXT4_ERROR_INODE(inode,
+ "corrupt xattr in inline inode");
+ return 0;
+ }
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
+ entry = next;
}
free = min_offs -
((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
@@ -350,7 +360,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
- if (error == -ENODATA)
+ if (error < 0)
goto out;
BUFFER_TRACE(is.iloc.bh, "get_write_access");
@@ -1175,6 +1185,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
ext4_initialize_dirent_tail(dir_block,
inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
+ unlock_buffer(dir_block);
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
@@ -1249,6 +1260,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
if (!S_ISDIR(inode->i_mode)) {
memcpy(data_bh->b_data, buf, inline_size);
set_buffer_uptodate(data_bh);
+ unlock_buffer(data_bh);
error = ext4_handle_dirty_metadata(handle,
inode, data_bh);
} else {
@@ -1256,7 +1268,6 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
buf, inline_size);
}
- unlock_buffer(data_bh);
out_restore:
if (error)
ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d5ba922e411..02de439bf1f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2783,11 +2783,12 @@ static int ext4_writepages(struct address_space *mapping,
.can_map = 1,
};
int ret;
+ int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return -EIO;
- percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_read(sb);
ret = ext4_do_writepages(&mpd);
/*
* For data=journal writeback we could have come across pages marked
@@ -2796,7 +2797,7 @@ static int ext4_writepages(struct address_space *mapping,
*/
if (!ret && mpd.journalled_more_data)
ret = ext4_do_writepages(&mpd);
- percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ ext4_writepages_up_read(sb, alloc_ctx);
return ret;
}
@@ -2824,17 +2825,18 @@ static int ext4_dax_writepages(struct address_space *mapping,
long nr_to_write = wbc->nr_to_write;
struct inode *inode = mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- percpu_down_read(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_read(inode->i_sb);
trace_ext4_writepages(inode, wbc);
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
- percpu_up_read(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_read(inode->i_sb, alloc_ctx);
return ret;
}
@@ -3375,7 +3377,7 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
*/
flags &= ~IOMAP_WRITE;
ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
- WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
+ WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
return ret;
}
@@ -4639,6 +4641,24 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
inode_set_iversion_queried(inode, val);
}
+static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
+
+{
+ if (flags & EXT4_IGET_EA_INODE) {
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ return "missing EA_INODE flag";
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
+ EXT4_I(inode)->i_file_acl)
+ return "ea_inode with extended attributes";
+ } else {
+ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ return "unexpected EA_INODE flag";
+ }
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
+ return "unexpected bad inode w/o EXT4_IGET_BAD";
+ return NULL;
+}
+
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
@@ -4648,6 +4668,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
struct ext4_inode_info *ei;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct inode *inode;
+ const char *err_str;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
loff_t size;
@@ -4675,8 +4696,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode->i_state & I_NEW)) {
+ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
+ ext4_error_inode(inode, function, line, 0, err_str);
+ iput(inode);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
return inode;
+ }
ei = EXT4_I(inode);
iloc.bh = NULL;
@@ -4942,10 +4969,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
- if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
- ext4_error_inode(inode, function, line, 0,
- "bad inode without EXT4_IGET_BAD flag");
- ret = -EUCLEAN;
+ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
+ ext4_error_inode(inode, function, line, 0, err_str);
+ ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -5928,7 +5954,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
journal_t *journal;
handle_t *handle;
int err;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int alloc_ctx;
/*
* We have to be very careful here: changing a data block's
@@ -5966,7 +5992,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
}
}
- percpu_down_write(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_write(inode->i_sb);
jbd2_journal_lock_updates(journal);
/*
@@ -5983,7 +6009,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
err = jbd2_journal_flush(journal, 0);
if (err < 0) {
jbd2_journal_unlock_updates(journal);
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return err;
}
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
@@ -5991,7 +6017,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal);
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
if (val)
filemap_invalidate_unlock(inode->i_mapping);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 78259bddbc4d..20f67a260df5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -745,6 +745,8 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
grp = ext4_get_group_info(sb, e4b->bd_group);
+ if (!grp)
+ return NULL;
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
@@ -1060,9 +1062,9 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
static noinline_for_stack
void ext4_mb_generate_buddy(struct super_block *sb,
- void *buddy, void *bitmap, ext4_group_t group)
+ void *buddy, void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp)
{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
@@ -1181,6 +1183,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
break;
grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo)
+ continue;
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
@@ -1246,6 +1250,10 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1256,7 +1264,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
ext4_lock_group(sb, group);
/* init the buddy */
memset(data, 0xff, blocksize);
- ext4_mb_generate_buddy(sb, data, incore, group);
+ ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
ext4_unlock_group(sb, group);
incore = NULL;
} else {
@@ -1370,6 +1378,9 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
might_sleep();
mb_debug(sb, "init group %u\n", group);
this_grp = ext4_get_group_info(sb, group);
+ if (!this_grp)
+ return -EFSCORRUPTED;
+
/*
* This ensures that we don't reinit the buddy cache
* page which map to the group from which we are already
@@ -1444,6 +1455,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
blocks_per_page = PAGE_SIZE / sb->s_blocksize;
grp = ext4_get_group_info(sb, group);
+ if (!grp)
+ return -EFSCORRUPTED;
e4b->bd_blkbits = sb->s_blocksize_bits;
e4b->bd_info = grp;
@@ -2049,7 +2062,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
if (bex->fe_len < gex->fe_len)
return;
- if (finish_group)
+ if (finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
ext4_mb_use_best_found(ac, e4b);
}
@@ -2061,6 +2074,20 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
* in the context. Later, the best found extent will be used, if
* mballoc can't find good enough extent.
*
+ * The algorithm used is roughly as follows:
+ *
+ * * If free extent found is exactly as big as goal, then
+ * stop the scan and use it immediately
+ *
+ * * If free extent found is smaller than goal, then keep retrying
+ * upto a max of sbi->s_mb_max_to_scan times (default 200). After
+ * that stop scanning and use whatever we have.
+ *
+ * * If free extent found is bigger than goal, then keep retrying
+ * upto a max of sbi->s_mb_min_to_scan times (default 10) before
+ * stopping the scan and using the extent.
+ *
+ *
* FIXME: real allocation policy is to be designed yet!
*/
static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
@@ -2159,6 +2186,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
struct ext4_free_extent ex;
+ if (!grp)
+ return -EFSCORRUPTED;
if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY)))
return 0;
if (grp->bb_free == 0)
@@ -2385,7 +2414,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
BUG_ON(cr < 0 || cr >= 4);
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp))
return false;
free = grp->bb_free;
@@ -2454,6 +2483,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
ext4_grpblk_t free;
int ret = 0;
+ if (!grp)
+ return -EFSCORRUPTED;
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
if (should_lock) {
@@ -2534,7 +2565,7 @@ ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
* prefetch once, so we avoid getblk() call, which can
* be expensive.
*/
- if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
EXT4_MB_GRP_NEED_INIT(grp) &&
ext4_free_group_clusters(sb, gdp) > 0 &&
!(ext4_has_group_desc_csum(sb) &&
@@ -2578,7 +2609,7 @@ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
gdp = ext4_get_group_desc(sb, group, NULL);
grp = ext4_get_group_info(sb, group);
- if (EXT4_MB_GRP_NEED_INIT(grp) &&
+ if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
ext4_free_group_clusters(sb, gdp) > 0 &&
!(ext4_has_group_desc_csum(sb) &&
(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
@@ -2837,6 +2868,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
sizeof(struct ext4_group_info);
grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo)
+ return 0;
/* Load the group info in memory only if not already loaded. */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -2847,7 +2880,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
buddy_loaded = 1;
}
- memcpy(&sg, ext4_get_group_info(sb, group), i);
+ memcpy(&sg, grinfo, i);
if (buddy_loaded)
ext4_mb_unload_buddy(&e4b);
@@ -3208,8 +3241,12 @@ static int ext4_mb_init_backend(struct super_block *sb)
err_freebuddy:
cachep = get_groupinfo_cache(sb->s_blocksize_bits);
- while (i-- > 0)
- kmem_cache_free(cachep, ext4_get_group_info(sb, i));
+ while (i-- > 0) {
+ struct ext4_group_info *grp = ext4_get_group_info(sb, i);
+
+ if (grp)
+ kmem_cache_free(cachep, grp);
+ }
i = sbi->s_group_info_size;
rcu_read_lock();
group_info = rcu_dereference(sbi->s_group_info);
@@ -3522,6 +3559,8 @@ int ext4_mb_release(struct super_block *sb)
for (i = 0; i < ngroups; i++) {
cond_resched();
grinfo = ext4_get_group_info(sb, i);
+ if (!grinfo)
+ continue;
mb_group_bb_bitmap_free(grinfo);
ext4_lock_group(sb, i);
count = ext4_mb_cleanup_pa(grinfo);
@@ -4606,6 +4645,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
struct ext4_free_data *entry;
grp = ext4_get_group_info(sb, group);
+ if (!grp)
+ return;
n = rb_first(&(grp->bb_free_root));
while (n) {
@@ -4633,6 +4674,9 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
int preallocated = 0;
int len;
+ if (!grp)
+ return;
+
/* all form of preallocation discards first load group,
* so the only competing code is preallocation use.
* we don't need any locking here
@@ -4869,6 +4913,8 @@ adjust_bex:
ei = EXT4_I(ac->ac_inode);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
+ if (!grp)
+ return;
pa->pa_node_lock.inode_lock = &ei->i_prealloc_lock;
pa->pa_inode = ac->ac_inode;
@@ -4918,6 +4964,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
+ if (!grp)
+ return;
lg = ac->ac_lg;
BUG_ON(lg == NULL);
@@ -5013,7 +5061,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
+ ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
+ e4b->bd_group, group, pa->pa_pstart);
+ return 0;
+ }
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
@@ -5043,6 +5095,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
int err;
int free = 0;
+ if (!grp)
+ return 0;
mb_debug(sb, "discard preallocation for group %u\n", group);
if (list_empty(&grp->bb_prealloc_list))
goto out_dbg;
@@ -5297,6 +5351,9 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
struct ext4_prealloc_space *pa;
ext4_grpblk_t start;
struct list_head *cur;
+
+ if (!grp)
+ continue;
ext4_lock_group(sb, i);
list_for_each(cur, &grp->bb_prealloc_list) {
pa = list_entry(cur, struct ext4_prealloc_space,
@@ -6064,6 +6121,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
struct ext4_group_desc *gdp;
+ struct ext4_group_info *grp;
unsigned int overflow;
ext4_grpblk_t bit;
struct buffer_head *gd_bh;
@@ -6089,8 +6147,8 @@ do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
- ext4_get_group_info(sb, block_group))))
+ grp = ext4_get_group_info(sb, block_group);
+ if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
return;
/*
@@ -6692,6 +6750,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
for (group = first_group; group <= last_group; group++) {
grp = ext4_get_group_info(sb, group);
+ if (!grp)
+ continue;
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
ret = ext4_mb_init_group(sb, group, GFP_NOFS);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a19a9661646e..d98ac2af8199 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -408,7 +408,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
int ext4_ext_migrate(struct inode *inode)
{
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
int retval = 0, i;
__le32 *i_data;
@@ -418,6 +417,7 @@ int ext4_ext_migrate(struct inode *inode)
unsigned long max_entries;
__u32 goal, tmp_csum_seed;
uid_t owner[2];
+ int alloc_ctx;
/*
* If the filesystem does not support extents, or the inode
@@ -434,7 +434,7 @@ int ext4_ext_migrate(struct inode *inode)
*/
return retval;
- percpu_down_write(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_write(inode->i_sb);
/*
* Worst case we can touch the allocation bitmaps and a block
@@ -586,7 +586,7 @@ out_tmp_inode:
unlock_new_inode(tmp_inode);
iput(tmp_inode);
out_unlock:
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return retval;
}
@@ -605,6 +605,7 @@ int ext4_ind_migrate(struct inode *inode)
ext4_fsblk_t blk;
handle_t *handle;
int ret, ret2 = 0;
+ int alloc_ctx;
if (!ext4_has_feature_extents(inode->i_sb) ||
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
@@ -621,7 +622,7 @@ int ext4_ind_migrate(struct inode *inode)
if (test_opt(inode->i_sb, DELALLOC))
ext4_alloc_da_blocks(inode);
- percpu_down_write(&sbi->s_writepages_rwsem);
+ alloc_ctx = ext4_writepages_down_write(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle)) {
@@ -665,6 +666,6 @@ errout:
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
out_unlock:
- percpu_up_write(&sbi->s_writepages_rwsem);
+ ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return ret;
}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 4022bc713421..0aaf38ffcb6e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -39,28 +39,36 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
* Write the MMP block using REQ_SYNC to try to get the block on-disk
* faster.
*/
-static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
+static int write_mmp_block_thawed(struct super_block *sb,
+ struct buffer_head *bh)
{
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
- /*
- * We protect against freezing so that we don't create dirty buffers
- * on frozen filesystem.
- */
- sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
- sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
return -EIO;
-
return 0;
}
+static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
+{
+ int err;
+
+ /*
+ * We protect against freezing so that we don't create dirty buffers
+ * on frozen filesystem.
+ */
+ sb_start_write(sb);
+ err = write_mmp_block_thawed(sb, bh);
+ sb_end_write(sb);
+ return err;
+}
+
/*
* Read the MMP block. It _must_ be read from disk and hence we clear the
* uptodate flag on the buffer.
@@ -344,7 +352,11 @@ skip:
seq = mmp_new_seq();
mmp->mmp_seq = cpu_to_le32(seq);
- retval = write_mmp_block(sb, bh);
+ /*
+ * On mount / remount we are protected against fs freezing (by s_umount
+ * semaphore) and grabbing freeze protection upsets lockdep
+ */
+ retval = write_mmp_block_thawed(sb, bh);
if (retval)
goto failed;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a5010b5b8a8c..45b579805c95 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -674,7 +674,7 @@ static struct stats dx_show_leaf(struct inode *dir,
len = de->name_len;
if (!IS_ENCRYPTED(dir)) {
/* Directory is not encrypted */
- ext4fs_dirhash(dir, de->name,
+ (void) ext4fs_dirhash(dir, de->name,
de->name_len, &h);
printk("%*.s:(U)%x.%u ", len,
name, h.hash,
@@ -709,8 +709,9 @@ static struct stats dx_show_leaf(struct inode *dir,
if (IS_CASEFOLDED(dir))
h.hash = EXT4_DIRENT_HASH(de);
else
- ext4fs_dirhash(dir, de->name,
- de->name_len, &h);
+ (void) ext4fs_dirhash(dir,
+ de->name,
+ de->name_len, &h);
printk("%*.s:(E)%x.%u ", len, name,
h.hash, (unsigned) ((char *) de
- base));
@@ -720,7 +721,8 @@ static struct stats dx_show_leaf(struct inode *dir,
#else
int len = de->name_len;
char *name = de->name;
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ (void) ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
printk("%*.s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
#endif
@@ -849,8 +851,14 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
/* hash is already computed for encrypted casefolded directory */
if (fname && fname_name(fname) &&
- !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
- ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
+ !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) {
+ int ret = ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), hinfo);
+ if (ret < 0) {
+ ret_err = ERR_PTR(ret);
+ goto fail;
+ }
+ }
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
@@ -1111,7 +1119,12 @@ static int htree_dirblock_to_tree(struct file *dir_file,
hinfo->minor_hash = 0;
}
} else {
- ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
+ err = ext4fs_dirhash(dir, de->name,
+ de->name_len, hinfo);
+ if (err < 0) {
+ count = err;
+ goto errout;
+ }
}
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
@@ -1313,8 +1326,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
if (de->name_len && de->inode) {
if (ext4_hash_in_dirent(dir))
h.hash = EXT4_DIRENT_HASH(de);
- else
- ext4fs_dirhash(dir, de->name, de->name_len, &h);
+ else {
+ int err = ext4fs_dirhash(dir, de->name,
+ de->name_len, &h);
+ if (err < 0)
+ return err;
+ }
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1452,10 +1469,9 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
hinfo->hash_version = DX_HASH_SIPHASH;
hinfo->seed = NULL;
if (cf_name->name)
- ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+ return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
else
- ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
- return 0;
+ return ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
}
#endif
@@ -2298,10 +2314,15 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
/* casefolded encrypted hashes are computed on fname setup */
- if (!ext4_hash_in_dirent(dir))
- ext4fs_dirhash(dir, fname_name(fname),
- fname_len(fname), &fname->hinfo);
-
+ if (!ext4_hash_in_dirent(dir)) {
+ int err = ext4fs_dirhash(dir, fname_name(fname),
+ fname_len(fname), &fname->hinfo);
+ if (err < 0) {
+ brelse(bh2);
+ brelse(bh);
+ return err;
+ }
+ }
memset(frames, 0, sizeof(frames));
frame = frames;
frame->entries = entries;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d39f386e9baf..05fcecc36244 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1048,6 +1048,8 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
int ret;
+ if (!grp || !gdp)
+ return;
if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
&grp->bb_state);
@@ -3238,11 +3240,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if (ext4_has_feature_64bit(sb) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -5684,8 +5684,9 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
descr = "out journal";
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
- ext4_msg(sb, KERN_INFO, "mounted filesystem %pU with%s. "
- "Quota mode: %s.", &sb->s_uuid, descr,
+ ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
+ "Quota mode: %s.", &sb->s_uuid,
+ sb_rdonly(sb) ? "ro" : "r/w", descr,
ext4_quota_mode(sb));
/* Update the s_overhead_clusters if necessary */
@@ -6587,18 +6588,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
/*
- * Reinitialize lazy itable initialization thread based on
- * current settings
- */
- if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
- ext4_unregister_li_request(sb);
- else {
- ext4_group_t first_not_zeroed;
- first_not_zeroed = ext4_has_uninit_itable(sb);
- ext4_register_li_request(sb, first_not_zeroed);
- }
-
- /*
* Handle creation of system zone data early because it can fail.
* Releasing of existing data is done when we are sure remount will
* succeed.
@@ -6616,9 +6605,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
@@ -6628,16 +6614,38 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
+
if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
ext4_stop_mmpd(sbi);
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -6678,8 +6686,9 @@ static int ext4_reconfigure(struct fs_context *fc)
if (ret < 0)
return ret;
- ext4_msg(sb, KERN_INFO, "re-mounted %pU. Quota mode: %s.",
- &sb->s_uuid, ext4_quota_mode(sb));
+ ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
+ &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
+ ext4_quota_mode(sb));
return 0;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index dadad29bd81b..321e3a888c20 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -121,7 +121,11 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
{
+ struct ext4_inode_info *ei = EXT4_I(ea_inode);
+
lockdep_set_subclass(&ea_inode->i_rwsem, 1);
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
+ lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
}
#endif
@@ -433,7 +437,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
return -EFSCORRUPTED;
}
- inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
+ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(parent->i_sb,
@@ -441,23 +445,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
err);
return err;
}
-
- if (is_bad_inode(inode)) {
- ext4_error(parent->i_sb,
- "error while reading EA inode %lu is_bad_inode",
- ea_ino);
- err = -EIO;
- goto error;
- }
-
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
- ext4_error(parent->i_sb,
- "EA inode %lu does not have EXT4_EA_INODE_FL flag",
- ea_ino);
- err = -EINVAL;
- goto error;
- }
-
ext4_xattr_inode_set_class(inode);
/*
@@ -478,9 +465,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
*ea_inode = inode;
return 0;
-error:
- iput(inode);
- return err;
}
/* Remove entry from mbcache when EA inode is getting evicted */
@@ -1556,11 +1540,11 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
while (ce) {
ea_inode = ext4_iget(inode->i_sb, ce->e_value,
- EXT4_IGET_NORMAL);
- if (!IS_ERR(ea_inode) &&
- !is_bad_inode(ea_inode) &&
- (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
- i_size_read(ea_inode) == value_len &&
+ EXT4_IGET_EA_INODE);
+ if (IS_ERR(ea_inode))
+ goto next_entry;
+ ext4_xattr_inode_set_class(ea_inode);
+ if (i_size_read(ea_inode) == value_len &&
!ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
!ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
value_len) &&
@@ -1570,9 +1554,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
kvfree(ea_data);
return ea_inode;
}
-
- if (!IS_ERR(ea_inode))
- iput(ea_inode);
+ iput(ea_inode);
+ next_entry:
ce = mb_cache_entry_find_next(ea_inode_cache, ce);
}
kvfree(ea_data);
@@ -2073,8 +2056,9 @@ inserted:
else {
u32 ref;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -2137,8 +2121,9 @@ inserted:
/* We need to allocate a new block */
ext4_fsblk_t goal, block;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
block = ext4_new_meta_blocks(handle, inode, goal, 0,
@@ -2614,6 +2599,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
.in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int needs_kvfree = 0;
int error;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
@@ -2636,7 +2622,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
error = -ENOMEM;
goto out;
}
-
+ needs_kvfree = 1;
error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error)
goto out;
@@ -2675,7 +2661,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
out:
kfree(b_entry_name);
- if (entry->e_value_inum && buffer)
+ if (needs_kvfree && buffer)
kvfree(buffer);
if (is)
brelse(is->iloc.bh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 300844f50dcd..cb62c8f07d1e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -784,9 +784,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i,
if (!user_backed_iter(i))
return false;
+ /*
+ * Try to fault in multiple pages initially. When that doesn't result
+ * in any progress, fall back to a single page.
+ */
size = PAGE_SIZE;
offs = offset_in_page(iocb->ki_pos);
- if (*prev_count != count || !*window_size) {
+ if (*prev_count != count) {
size_t nr_dirtied;
nr_dirtied = max(current->nr_dirtied_pause -
@@ -870,6 +874,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
struct gfs2_inode *ip = GFS2_I(inode);
size_t prev_count = 0, window_size = 0;
size_t written = 0;
+ bool enough_retries;
ssize_t ret;
/*
@@ -913,11 +918,17 @@ retry:
if (ret > 0)
written = ret;
+ enough_retries = prev_count == iov_iter_count(from) &&
+ window_size <= PAGE_SIZE;
if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
gfs2_glock_dq(gh);
window_size -= fault_in_iov_iter_readable(from, window_size);
- if (window_size)
- goto retry;
+ if (window_size) {
+ if (!enough_retries)
+ goto retry;
+ /* fall back to buffered I/O */
+ ret = 0;
+ }
}
out_unlock:
if (gfs2_holder_queued(gh))
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 5eed8c237500..a84bf6444bba 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1419,6 +1419,14 @@ static void gfs2_evict_inode(struct inode *inode)
if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
goto out;
+ /*
+ * In case of an incomplete mount, gfs2_evict_inode() may be called for
+ * system files without having an active journal to write to. In that
+ * case, skip the filesystem evict.
+ */
+ if (!sdp->sd_jdesc)
+ goto out;
+
gfs2_holder_mark_uninitialized(&gh);
ret = evict_should_delete(inode, &gh);
if (ret == SHOULD_DEFER_EVICTION)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index bb94949bc223..04ba95b83d16 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -77,9 +77,9 @@ static const unsigned long nlm_grace_period_min = 0;
static const unsigned long nlm_grace_period_max = 240;
static const unsigned long nlm_timeout_min = 3;
static const unsigned long nlm_timeout_max = 20;
-static const int nlm_port_min = 0, nlm_port_max = 65535;
#ifdef CONFIG_SYSCTL
+static const int nlm_port_min = 0, nlm_port_max = 65535;
static struct ctl_table_header * nlm_sysctl_table;
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bacad0c57810..8f3112e71a6a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -317,7 +317,7 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
name = nfs_readdir_copy_name(entry->name, entry->len);
- array = kmap_atomic(folio_page(folio, 0));
+ array = kmap_local_folio(folio, 0);
if (!name)
goto out;
ret = nfs_readdir_array_can_expand(array);
@@ -340,7 +340,7 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
nfs_readdir_array_set_eof(array);
out:
*cookie = array->last_cookie;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
@@ -402,7 +402,7 @@ static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping,
struct folio *folio;
folio = filemap_grab_folio(mapping, index);
- if (!folio)
+ if (IS_ERR(folio))
return NULL;
nfs_readdir_folio_init_and_validate(folio, cookie, change_attr);
return folio;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 18f25ff4bff7..d3665390c4cb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5437,10 +5437,18 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
return false;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
+static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr)
{
- if (hdr->res.scratch)
+ if (hdr->res.scratch) {
kfree(hdr->res.scratch);
+ hdr->res.scratch = NULL;
+ }
+}
+
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
+{
+ nfs4_read_plus_scratch_free(hdr);
+
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7b8f17ee5224..b4fd7a7062d5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -153,18 +153,6 @@ static int exports_net_open(struct net *net, struct file *file)
return 0;
}
-static int exports_proc_open(struct inode *inode, struct file *file)
-{
- return exports_net_open(current->nsproxy->net_ns, file);
-}
-
-static const struct proc_ops exports_proc_ops = {
- .proc_open = exports_proc_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = seq_release,
-};
-
static int exports_nfsd_open(struct inode *inode, struct file *file)
{
return exports_net_open(inode->i_sb->s_fs_info, file);
@@ -702,16 +690,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
if (err != 0 || fd < 0)
return -EINVAL;
- if (svc_alien_sock(net, fd)) {
- printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
- return -EINVAL;
- }
-
err = nfsd_create_serv(net);
if (err != 0)
return err;
- err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+ err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
if (err >= 0 &&
!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
@@ -1458,6 +1441,19 @@ static struct file_system_type nfsd_fs_type = {
MODULE_ALIAS_FS("nfsd");
#ifdef CONFIG_PROC_FS
+
+static int exports_proc_open(struct inode *inode, struct file *file)
+{
+ return exports_net_open(current->nsproxy->net_ns, file);
+}
+
+static const struct proc_ops exports_proc_ops = {
+ .proc_open = exports_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
static int create_proc_exports_entry(void)
{
struct proc_dir_entry *entry;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 4183819ea082..72a906a053dc 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1365,19 +1365,19 @@ TRACE_EVENT(nfsd_cb_setup,
__field(u32, cl_id)
__field(unsigned long, authflavor)
__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
- __array(unsigned char, netid, 8)
+ __string(netid, netid)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
- strlcpy(__entry->netid, netid, sizeof(__entry->netid));
+ __assign_str(netid, netid);
__entry->authflavor = authflavor;
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s",
__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
- __entry->netid, show_nfsd_authflavor(__entry->authflavor))
+ __get_str(netid), show_nfsd_authflavor(__entry->authflavor))
);
TRACE_EVENT(nfsd_cb_setup_err,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bb9d47172162..db67f8e19344 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -536,7 +536,15 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock(inode);
for (retries = 1;;) {
- host_err = __nfsd_setattr(dentry, iap);
+ struct iattr attrs;
+
+ /*
+ * notify_change() can alter its iattr argument, making
+ * @iap unsuitable for submission multiple times. Make a
+ * copy for every loop iteration.
+ */
+ attrs = *iap;
+ host_err = __nfsd_setattr(dentry, &attrs);
if (host_err != -EAGAIN || !retries--)
break;
if (!nfsd_wait_for_delegreturn(rqstp, inode))
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 1310d2d5feb3..a8ce522ac747 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -917,6 +917,7 @@ void nilfs_evict_inode(struct inode *inode)
struct nilfs_transaction_info ti;
struct super_block *sb = inode->i_sb;
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs;
int ret;
if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
@@ -929,6 +930,23 @@ void nilfs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
+ nilfs = sb->s_fs_info;
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
+ /*
+ * If this inode is about to be disposed after the file system
+ * has been degraded to read-only due to file system corruption
+ * or after the writer has been detached, do not make any
+ * changes that cause writes, just clear it.
+ * Do this check after read-locking ns_segctor_sem by
+ * nilfs_transaction_begin() in order to avoid a race with
+ * the writer detach operation.
+ */
+ clear_inode(inode);
+ nilfs_clear_inode(inode);
+ nilfs_transaction_abort(sb);
+ return;
+ }
+
/* TODO: some of the following operations may fail. */
nilfs_truncate_bmap(ii, 0);
nilfs_mark_inode_dirty(inode);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 49cfe2ae6d23..993375f0db67 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -65,7 +65,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
struct fsnotify_event *fsn_event;
struct fsnotify_group *group = inode_mark->group;
int ret;
- int len = 0;
+ int len = 0, wd;
int alloc_len = sizeof(struct inotify_event_info);
struct mem_cgroup *old_memcg;
@@ -81,6 +81,13 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsn_mark);
/*
+ * We can be racing with mark being detached. Don't report event with
+ * invalid wd.
+ */
+ wd = READ_ONCE(i_mark->wd);
+ if (wd == -1)
+ return 0;
+ /*
* Whoever is interested in the event, pays for the allocation. Do not
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
@@ -110,7 +117,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
fsn_event = &event->fse;
fsnotify_init_event(fsn_event);
event->mask = mask;
- event->wd = i_mark->wd;
+ event->wd = wd;
event->sync_cookie = cookie;
event->name_len = len;
if (len)
diff --git a/fs/pipe.c b/fs/pipe.c
index ceb17d2dfa19..2d88f73f585a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -342,7 +342,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
if (ret)
break;
- if (filp->f_flags & O_NONBLOCK) {
+ if ((filp->f_flags & O_NONBLOCK) ||
+ (iocb->ki_flags & IOCB_NOWAIT)) {
ret = -EAGAIN;
break;
}
@@ -547,7 +548,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
continue;
/* Wait for buffer space to become available. */
- if (filp->f_flags & O_NONBLOCK) {
+ if ((filp->f_flags & O_NONBLOCK) ||
+ (iocb->ki_flags & IOCB_NOWAIT)) {
if (!ret)
ret = -EAGAIN;
break;
diff --git a/fs/smb/Kconfig b/fs/smb/Kconfig
new file mode 100644
index 000000000000..ef425789fa6a
--- /dev/null
+++ b/fs/smb/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# smbfs configuration
+
+source "fs/smb/client/Kconfig"
+source "fs/smb/server/Kconfig"
+
+config SMBFS
+ tristate
+ default y if CIFS=y || SMB_SERVER=y
+ default m if CIFS=m || SMB_SERVER=m
diff --git a/fs/smb/Makefile b/fs/smb/Makefile
new file mode 100644
index 000000000000..9a1bf59a1a65
--- /dev/null
+++ b/fs/smb/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SMBFS) += common/
+obj-$(CONFIG_CIFS) += client/
+obj-$(CONFIG_SMB_SERVER) += server/
diff --git a/fs/cifs/Kconfig b/fs/smb/client/Kconfig
index 4c0d53bf931a..4c0d53bf931a 100644
--- a/fs/cifs/Kconfig
+++ b/fs/smb/client/Kconfig
diff --git a/fs/cifs/Makefile b/fs/smb/client/Makefile
index 304a7f6cc13a..304a7f6cc13a 100644
--- a/fs/cifs/Makefile
+++ b/fs/smb/client/Makefile
diff --git a/fs/cifs/asn1.c b/fs/smb/client/asn1.c
index b5724ef9f182..b5724ef9f182 100644
--- a/fs/cifs/asn1.c
+++ b/fs/smb/client/asn1.c
diff --git a/fs/cifs/cached_dir.c b/fs/smb/client/cached_dir.c
index bfc964b36c72..bfc964b36c72 100644
--- a/fs/cifs/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
diff --git a/fs/cifs/cached_dir.h b/fs/smb/client/cached_dir.h
index 2f4e764c9ca9..2f4e764c9ca9 100644
--- a/fs/cifs/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
diff --git a/fs/cifs/cifs_debug.c b/fs/smb/client/cifs_debug.c
index d4ed200a9471..5034b862cec2 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -108,7 +108,7 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
if ((tcon->seal) ||
(tcon->ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) ||
(tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA))
- seq_printf(m, " Encrypted");
+ seq_puts(m, " encrypted");
if (tcon->nocase)
seq_printf(m, " nocase");
if (tcon->unix_ext)
@@ -415,8 +415,12 @@ skip_rdma:
/* dump session id helpful for use with network trace */
seq_printf(m, " SessionId: 0x%llx", ses->Suid);
- if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+ if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) {
seq_puts(m, " encrypted");
+ /* can help in debugging to show encryption type */
+ if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ seq_puts(m, "(gcm256)");
+ }
if (ses->sign)
seq_puts(m, " signed");
diff --git a/fs/cifs/cifs_debug.h b/fs/smb/client/cifs_debug.h
index ce5cfd236fdb..ce5cfd236fdb 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/smb/client/cifs_debug.h
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/smb/client/cifs_dfs_ref.c
index 0329a907bdfe..0329a907bdfe 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/smb/client/cifs_dfs_ref.c
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/smb/client/cifs_fs_sb.h
index 651759192280..651759192280 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/smb/client/cifs_fs_sb.h
diff --git a/fs/cifs/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h
index 332588e77c31..332588e77c31 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/smb/client/cifs_ioctl.h
diff --git a/fs/cifs/cifs_spnego.c b/fs/smb/client/cifs_spnego.c
index 6f3285f1dfee..6f3285f1dfee 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/smb/client/cifs_spnego.c
diff --git a/fs/cifs/cifs_spnego.h b/fs/smb/client/cifs_spnego.h
index e4d751b0c812..e4d751b0c812 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/smb/client/cifs_spnego.h
diff --git a/fs/cifs/cifs_spnego_negtokeninit.asn1 b/fs/smb/client/cifs_spnego_negtokeninit.asn1
index 181c083887d5..181c083887d5 100644
--- a/fs/cifs/cifs_spnego_negtokeninit.asn1
+++ b/fs/smb/client/cifs_spnego_negtokeninit.asn1
diff --git a/fs/cifs/cifs_swn.c b/fs/smb/client/cifs_swn.c
index 7233c6a7e6d7..7233c6a7e6d7 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/smb/client/cifs_swn.c
diff --git a/fs/cifs/cifs_swn.h b/fs/smb/client/cifs_swn.h
index 8a9d2a5c9077..8a9d2a5c9077 100644
--- a/fs/cifs/cifs_swn.h
+++ b/fs/smb/client/cifs_swn.h
diff --git a/fs/cifs/cifs_unicode.c b/fs/smb/client/cifs_unicode.c
index e7582dd79179..e7582dd79179 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/smb/client/cifs_unicode.c
diff --git a/fs/cifs/cifs_unicode.h b/fs/smb/client/cifs_unicode.h
index 80b3d845419f..80b3d845419f 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/smb/client/cifs_unicode.h
diff --git a/fs/cifs/cifs_uniupr.h b/fs/smb/client/cifs_uniupr.h
index 7b272fcdf0d3..7b272fcdf0d3 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/smb/client/cifs_uniupr.h
diff --git a/fs/cifs/cifsacl.c b/fs/smb/client/cifsacl.c
index f5b6df82e857..f5b6df82e857 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
diff --git a/fs/cifs/cifsacl.h b/fs/smb/client/cifsacl.h
index ccbfc754bd3c..ccbfc754bd3c 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/smb/client/cifsacl.h
diff --git a/fs/cifs/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index 357bd27a7fd1..ef4c2e3c9fa6 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -21,7 +21,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/fips.h>
-#include "../smbfs_common/arc4.h"
+#include "../common/arc4.h"
#include <crypto/aead.h>
/*
diff --git a/fs/cifs/cifsfs.c b/fs/smb/client/cifsfs.c
index 32f7c81a7b89..43a4d8603db3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -246,7 +246,7 @@ cifs_read_super(struct super_block *sb)
if (cifs_sb->ctx->rasize)
sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE;
else
- sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE;
+ sb->s_bdi->ra_pages = 2 * (cifs_sb->ctx->rsize / PAGE_SIZE);
sb->s_blocksize = CIFS_MAX_MSGSIZE;
sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
@@ -744,6 +744,7 @@ static void cifs_umount_begin(struct super_block *sb)
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
+ cifs_close_all_deferred_files(tcon);
/* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
/* cancel_notify_requests(tcon); */
if (tcon->ses && tcon->ses->server) {
@@ -759,6 +760,20 @@ static void cifs_umount_begin(struct super_block *sb)
return;
}
+static int cifs_freeze(struct super_block *sb)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon;
+
+ if (cifs_sb == NULL)
+ return 0;
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+
+ cifs_close_all_deferred_files(tcon);
+ return 0;
+}
+
#ifdef CONFIG_CIFS_STATS2
static int cifs_show_stats(struct seq_file *s, struct dentry *root)
{
@@ -797,6 +812,7 @@ static const struct super_operations cifs_super_ops = {
as opens */
.show_options = cifs_show_options,
.umount_begin = cifs_umount_begin,
+ .freeze_fs = cifs_freeze,
#ifdef CONFIG_CIFS_STATS2
.show_stats = cifs_show_stats,
#endif
diff --git a/fs/cifs/cifsfs.h b/fs/smb/client/cifsfs.h
index 74cd6fafb33e..74cd6fafb33e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
diff --git a/fs/cifs/cifsglob.h b/fs/smb/client/cifsglob.h
index 414685c5d530..0d84bb1a8cd9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -24,7 +24,7 @@
#include "cifsacl.h"
#include <crypto/internal/hash.h>
#include <uapi/linux/cifs/cifs_mount.h>
-#include "../smbfs_common/smb2pdu.h"
+#include "../common/smb2pdu.h"
#include "smb2pdu.h"
#include <linux/filelock.h>
@@ -424,8 +424,8 @@ struct smb_version_operations {
/* check for STATUS_NETWORK_SESSION_EXPIRED */
bool (*is_session_expired)(char *);
/* send oplock break response */
- int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
- struct cifsInodeInfo *);
+ int (*oplock_response)(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid,
+ __u16 net_fid, struct cifsInodeInfo *cifs_inode);
/* query remote filesystem */
int (*queryfs)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, struct kstatfs *);
diff --git a/fs/cifs/cifspdu.h b/fs/smb/client/cifspdu.h
index 445e3eaebcc1..e17222fec9d2 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -11,7 +11,7 @@
#include <net/sock.h>
#include <asm/unaligned.h>
-#include "../smbfs_common/smbfsctl.h"
+#include "../common/smbfsctl.h"
#define CIFS_PROT 0
#define POSIX_PROT (CIFS_PROT+1)
diff --git a/fs/cifs/cifsproto.h b/fs/smb/client/cifsproto.h
index c1c704990b98..c1c704990b98 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
diff --git a/fs/cifs/cifsroot.c b/fs/smb/client/cifsroot.c
index 56ec1b233f52..56ec1b233f52 100644
--- a/fs/cifs/cifsroot.c
+++ b/fs/smb/client/cifsroot.c
diff --git a/fs/cifs/cifssmb.c b/fs/smb/client/cifssmb.c
index 9d963caec35c..9d963caec35c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
diff --git a/fs/cifs/connect.c b/fs/smb/client/connect.c
index eeeed6fda13b..8e9a672320ab 100644
--- a/fs/cifs/connect.c
+++ b/fs/smb/client/connect.c
@@ -2709,6 +2709,13 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
+
+ /* We do not want to use a superblock that has been shutdown */
+ if (CIFS_MOUNT_SHUTDOWN & cifs_sb->mnt_cifs_flags) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
if (tlink == NULL) {
/* can not match superblock if tlink were ever null */
diff --git a/fs/cifs/dfs.c b/fs/smb/client/dfs.c
index a93dbca1411b..2f93bf8c3325 100644
--- a/fs/cifs/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -303,7 +303,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
if (!nodfs) {
rc = dfs_get_referral(mnt_ctx, ctx->UNC + 1, NULL, NULL);
if (rc) {
- if (rc != -ENOENT && rc != -EOPNOTSUPP)
+ if (rc != -ENOENT && rc != -EOPNOTSUPP && rc != -EIO)
goto out;
nodfs = true;
}
diff --git a/fs/cifs/dfs.h b/fs/smb/client/dfs.h
index 1c90df5ecfbd..1c90df5ecfbd 100644
--- a/fs/cifs/dfs.h
+++ b/fs/smb/client/dfs.h
diff --git a/fs/cifs/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 1513b2709889..1513b2709889 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
diff --git a/fs/cifs/dfs_cache.h b/fs/smb/client/dfs_cache.h
index c6d89cd6d4fd..c6d89cd6d4fd 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/smb/client/dfs_cache.h
diff --git a/fs/cifs/dir.c b/fs/smb/client/dir.c
index 30b1e1bfd204..30b1e1bfd204 100644
--- a/fs/cifs/dir.c
+++ b/fs/smb/client/dir.c
diff --git a/fs/cifs/dns_resolve.c b/fs/smb/client/dns_resolve.c
index 8bf8978bc5d6..8bf8978bc5d6 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/smb/client/dns_resolve.c
diff --git a/fs/cifs/dns_resolve.h b/fs/smb/client/dns_resolve.h
index 6eb0c15a2440..6eb0c15a2440 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/smb/client/dns_resolve.h
diff --git a/fs/cifs/export.c b/fs/smb/client/export.c
index 37c28415df1e..37c28415df1e 100644
--- a/fs/cifs/export.c
+++ b/fs/smb/client/export.c
diff --git a/fs/cifs/file.c b/fs/smb/client/file.c
index c5fcefdfd797..df88b8c04d03 100644
--- a/fs/cifs/file.c
+++ b/fs/smb/client/file.c
@@ -3353,9 +3353,10 @@ static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_siz
while (n && ix < nbv) {
len = min3(n, bvecs[ix].bv_len - skip, max_size);
span += len;
+ max_size -= len;
nsegs++;
ix++;
- if (span >= max_size || nsegs >= max_segs)
+ if (max_size == 0 || nsegs >= max_segs)
break;
skip = 0;
n -= len;
@@ -4881,9 +4882,9 @@ void cifs_oplock_break(struct work_struct *work)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
int rc = 0;
- bool purge_cache = false;
- struct cifs_deferred_close *dclose;
- bool is_deferred = false;
+ bool purge_cache = false, oplock_break_cancelled;
+ __u64 persistent_fid, volatile_fid;
+ __u16 net_fid;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
@@ -4924,28 +4925,28 @@ oplock_break_ack:
* file handles but cached, then schedule deferred close immediately.
* So, new open will not use cached handle.
*/
- spin_lock(&CIFS_I(inode)->deferred_lock);
- is_deferred = cifs_is_deferred_close(cfile, &dclose);
- spin_unlock(&CIFS_I(inode)->deferred_lock);
- if (!CIFS_CACHE_HANDLE(cinode) && is_deferred &&
- cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) {
+ if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
cifs_close_deferred_file(cinode);
- }
+ persistent_fid = cfile->fid.persistent_fid;
+ volatile_fid = cfile->fid.volatile_fid;
+ net_fid = cfile->fid.netfid;
+ oplock_break_cancelled = cfile->oplock_break_cancelled;
+
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
/*
* releasing stale oplock after recent reconnect of smb session using
* a now incorrect file handle is not a data integrity issue but do
* not bother sending an oplock release if session to server still is
* disconnected since oplock already released by the server
*/
- if (!cfile->oplock_break_cancelled) {
- rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
- cinode);
+ if (!oplock_break_cancelled) {
+ rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid,
+ volatile_fid, net_fid, cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
- _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/fs_context.c b/fs/smb/client/fs_context.c
index ace11a1a7c8a..1bda75609b64 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -904,6 +904,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->sfu_remap = false; /* disable SFU mapping */
}
break;
+ case Opt_mapchars:
+ if (result.negated)
+ ctx->sfu_remap = false;
+ else {
+ ctx->sfu_remap = true;
+ ctx->remap = false; /* disable SFM (mapposix) mapping */
+ }
+ break;
case Opt_user_xattr:
if (result.negated)
ctx->no_xattr = 1;
diff --git a/fs/cifs/fs_context.h b/fs/smb/client/fs_context.h
index f4eaf8558902..f4eaf8558902 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/smb/client/fs_context.h
diff --git a/fs/cifs/fscache.c b/fs/smb/client/fscache.c
index 8f6909d633da..8f6909d633da 100644
--- a/fs/cifs/fscache.c
+++ b/fs/smb/client/fscache.c
diff --git a/fs/cifs/fscache.h b/fs/smb/client/fscache.h
index 173999610997..173999610997 100644
--- a/fs/cifs/fscache.h
+++ b/fs/smb/client/fscache.h
diff --git a/fs/cifs/inode.c b/fs/smb/client/inode.c
index 1087ac6104a9..1087ac6104a9 100644
--- a/fs/cifs/inode.c
+++ b/fs/smb/client/inode.c
diff --git a/fs/cifs/ioctl.c b/fs/smb/client/ioctl.c
index cb3be58cd55e..fff092bbc7a3 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -321,7 +321,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
struct tcon_link *tlink;
struct cifs_sb_info *cifs_sb;
__u64 ExtAttrBits = 0;
+#ifdef CONFIG_CIFS_POSIX
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
__u64 caps;
+#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+#endif /* CONFIG_CIFS_POSIX */
xid = get_xid();
@@ -331,9 +335,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
if (pSMBFile == NULL)
break;
tcon = tlink_tcon(pSMBFile->tlink);
- caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
#ifdef CONFIG_CIFS_POSIX
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
if (CIFS_UNIX_EXTATTR_CAP & caps) {
__u64 ExtAttrMask = 0;
rc = CIFSGetExtAttr(xid, tcon,
diff --git a/fs/cifs/link.c b/fs/smb/client/link.c
index c66be4904e1f..c66be4904e1f 100644
--- a/fs/cifs/link.c
+++ b/fs/smb/client/link.c
diff --git a/fs/cifs/misc.c b/fs/smb/client/misc.c
index cd914be905b2..cd914be905b2 100644
--- a/fs/cifs/misc.c
+++ b/fs/smb/client/misc.c
diff --git a/fs/cifs/netlink.c b/fs/smb/client/netlink.c
index 147d9409252c..147d9409252c 100644
--- a/fs/cifs/netlink.c
+++ b/fs/smb/client/netlink.c
diff --git a/fs/cifs/netlink.h b/fs/smb/client/netlink.h
index e2fa8ed24c54..e2fa8ed24c54 100644
--- a/fs/cifs/netlink.h
+++ b/fs/smb/client/netlink.h
diff --git a/fs/cifs/netmisc.c b/fs/smb/client/netmisc.c
index 1b52e6ac431c..1b52e6ac431c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/smb/client/netmisc.c
diff --git a/fs/cifs/nterr.c b/fs/smb/client/nterr.c
index 358a766375b4..358a766375b4 100644
--- a/fs/cifs/nterr.c
+++ b/fs/smb/client/nterr.c
diff --git a/fs/cifs/nterr.h b/fs/smb/client/nterr.h
index edd4741cab0a..edd4741cab0a 100644
--- a/fs/cifs/nterr.h
+++ b/fs/smb/client/nterr.h
diff --git a/fs/cifs/ntlmssp.h b/fs/smb/client/ntlmssp.h
index 2c5dde2ece58..2c5dde2ece58 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/smb/client/ntlmssp.h
diff --git a/fs/cifs/readdir.c b/fs/smb/client/readdir.c
index ef638086d734..ef638086d734 100644
--- a/fs/cifs/readdir.c
+++ b/fs/smb/client/readdir.c
diff --git a/fs/cifs/rfc1002pdu.h b/fs/smb/client/rfc1002pdu.h
index ae1d025da294..ae1d025da294 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/smb/client/rfc1002pdu.h
diff --git a/fs/cifs/sess.c b/fs/smb/client/sess.c
index 335c078c42fb..335c078c42fb 100644
--- a/fs/cifs/sess.c
+++ b/fs/smb/client/sess.c
diff --git a/fs/cifs/smb1ops.c b/fs/smb/client/smb1ops.c
index abda6148be10..7d1b3fc014d9 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -897,12 +897,11 @@ cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
-cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
- struct cifsInodeInfo *cinode)
+cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
+ __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode)
{
- return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
- LOCKING_ANDX_OPLOCK_RELEASE, false,
- CIFS_CACHE_READ(cinode) ? 1 : 0);
+ return CIFSSMBLock(0, tcon, net_fid, current->tgid, 0, 0, 0, 0,
+ LOCKING_ANDX_OPLOCK_RELEASE, false, CIFS_CACHE_READ(cinode) ? 1 : 0);
}
static int
diff --git a/fs/cifs/smb2file.c b/fs/smb/client/smb2file.c
index e0ee96d69d49..e0ee96d69d49 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/smb/client/smb2file.c
diff --git a/fs/cifs/smb2glob.h b/fs/smb/client/smb2glob.h
index 82e916ad167c..82e916ad167c 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/smb/client/smb2glob.h
diff --git a/fs/cifs/smb2inode.c b/fs/smb/client/smb2inode.c
index 163a03298430..163a03298430 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
diff --git a/fs/cifs/smb2maperror.c b/fs/smb/client/smb2maperror.c
index 194799ddd382..194799ddd382 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/smb/client/smb2maperror.c
diff --git a/fs/cifs/smb2misc.c b/fs/smb/client/smb2misc.c
index 3935a60db5c3..3935a60db5c3 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
diff --git a/fs/cifs/smb2ops.c b/fs/smb/client/smb2ops.c
index a81758225fcd..6e3be58cfe49 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -618,7 +618,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
* Add a new one instead
*/
spin_lock(&ses->iface_lock);
- iface = niface = NULL;
list_for_each_entry_safe(iface, niface, &ses->iface_list,
iface_head) {
ret = iface_cmp(iface, &tmp_iface);
@@ -1682,7 +1681,7 @@ smb2_copychunk_range(const unsigned int xid,
pcchunk->SourceOffset = cpu_to_le64(src_off);
pcchunk->TargetOffset = cpu_to_le64(dest_off);
pcchunk->Length =
- cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
+ cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
kfree(retbuf);
@@ -2383,15 +2382,14 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
}
static int
-smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
- struct cifsInodeInfo *cinode)
+smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
+ __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode)
{
if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
return SMB2_lease_break(0, tcon, cinode->lease_key,
smb2_get_lease_state(cinode));
- return SMB2_oplock_break(0, tcon, fid->persistent_fid,
- fid->volatile_fid,
+ return SMB2_oplock_break(0, tcon, persistent_fid, volatile_fid,
CIFS_CACHE_READ(cinode) ? 1 : 0);
}
diff --git a/fs/cifs/smb2pdu.c b/fs/smb/client/smb2pdu.c
index e33ca0d33906..7063b395d22f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1947,6 +1947,9 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
init_copy_chunk_defaults(tcon);
if (server->ops->validate_negotiate)
rc = server->ops->validate_negotiate(xid, tcon);
+ if (rc == 0) /* See MS-SMB2 2.2.10 and 3.2.5.5 */
+ if (tcon->share_flags & SMB2_SHAREFLAG_ISOLATED_TRANSPORT)
+ server->nosharesock = true;
tcon_exit:
free_rsp_buf(resp_buftype, rsp);
@@ -3722,7 +3725,7 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
if (*out_data == NULL) {
rc = -ENOMEM;
goto cnotify_exit;
- } else
+ } else if (plen)
*plen = le32_to_cpu(smb_rsp->OutputBufferLength);
}
diff --git a/fs/cifs/smb2pdu.h b/fs/smb/client/smb2pdu.h
index 220994d0a0f7..220994d0a0f7 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/smb/client/smb2pdu.h
diff --git a/fs/cifs/smb2proto.h b/fs/smb/client/smb2proto.h
index d5d7ffb7711c..d5d7ffb7711c 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
diff --git a/fs/cifs/smb2status.h b/fs/smb/client/smb2status.h
index a9e958166fc5..a9e958166fc5 100644
--- a/fs/cifs/smb2status.h
+++ b/fs/smb/client/smb2status.h
diff --git a/fs/cifs/smb2transport.c b/fs/smb/client/smb2transport.c
index 790acf65a092..790acf65a092 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
diff --git a/fs/cifs/smbdirect.c b/fs/smb/client/smbdirect.c
index 0362ebd4fa0f..0362ebd4fa0f 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
diff --git a/fs/cifs/smbdirect.h b/fs/smb/client/smbdirect.h
index 83f239f376f0..83f239f376f0 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
diff --git a/fs/cifs/smbencrypt.c b/fs/smb/client/smbencrypt.c
index 4a0487753869..f0ce26414f17 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/smb/client/smbencrypt.c
@@ -24,7 +24,7 @@
#include "cifsglob.h"
#include "cifs_debug.h"
#include "cifsproto.h"
-#include "../smbfs_common/md4.h"
+#include "../common/md4.h"
#ifndef false
#define false 0
diff --git a/fs/cifs/smberr.h b/fs/smb/client/smberr.h
index aeffdad829e2..aeffdad829e2 100644
--- a/fs/cifs/smberr.h
+++ b/fs/smb/client/smberr.h
diff --git a/fs/cifs/trace.c b/fs/smb/client/trace.c
index 465483787193..465483787193 100644
--- a/fs/cifs/trace.c
+++ b/fs/smb/client/trace.c
diff --git a/fs/cifs/trace.h b/fs/smb/client/trace.h
index d3053bd8ae73..d3053bd8ae73 100644
--- a/fs/cifs/trace.h
+++ b/fs/smb/client/trace.h
diff --git a/fs/cifs/transport.c b/fs/smb/client/transport.c
index 24bdd5f4d3bc..24bdd5f4d3bc 100644
--- a/fs/cifs/transport.c
+++ b/fs/smb/client/transport.c
diff --git a/fs/cifs/unc.c b/fs/smb/client/unc.c
index f6fc5e343ea4..f6fc5e343ea4 100644
--- a/fs/cifs/unc.c
+++ b/fs/smb/client/unc.c
diff --git a/fs/cifs/winucase.c b/fs/smb/client/winucase.c
index 2f075b5b50df..2f075b5b50df 100644
--- a/fs/cifs/winucase.c
+++ b/fs/smb/client/winucase.c
diff --git a/fs/cifs/xattr.c b/fs/smb/client/xattr.c
index 4ad5531686d8..4ad5531686d8 100644
--- a/fs/cifs/xattr.c
+++ b/fs/smb/client/xattr.c
diff --git a/fs/smbfs_common/Makefile b/fs/smb/common/Makefile
index cafc61a3bfc3..c66dbbc1469c 100644
--- a/fs/smbfs_common/Makefile
+++ b/fs/smb/common/Makefile
@@ -3,5 +3,5 @@
# Makefile for Linux filesystem routines that are shared by client and server.
#
-obj-$(CONFIG_SMBFS_COMMON) += cifs_arc4.o
-obj-$(CONFIG_SMBFS_COMMON) += cifs_md4.o
+obj-$(CONFIG_SMBFS) += cifs_arc4.o
+obj-$(CONFIG_SMBFS) += cifs_md4.o
diff --git a/fs/smbfs_common/arc4.h b/fs/smb/common/arc4.h
index 12e71ec033a1..12e71ec033a1 100644
--- a/fs/smbfs_common/arc4.h
+++ b/fs/smb/common/arc4.h
diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smb/common/cifs_arc4.c
index 043e4cb839fa..043e4cb839fa 100644
--- a/fs/smbfs_common/cifs_arc4.c
+++ b/fs/smb/common/cifs_arc4.c
diff --git a/fs/smbfs_common/cifs_md4.c b/fs/smb/common/cifs_md4.c
index 50f78cfc6ce9..50f78cfc6ce9 100644
--- a/fs/smbfs_common/cifs_md4.c
+++ b/fs/smb/common/cifs_md4.c
diff --git a/fs/smbfs_common/md4.h b/fs/smb/common/md4.h
index 5337becc699a..5337becc699a 100644
--- a/fs/smbfs_common/md4.h
+++ b/fs/smb/common/md4.h
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index bae590eec871..bae590eec871 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smb/common/smbfsctl.h
index edd7fc2a7921..edd7fc2a7921 100644
--- a/fs/smbfs_common/smbfsctl.h
+++ b/fs/smb/common/smbfsctl.h
diff --git a/fs/ksmbd/Kconfig b/fs/smb/server/Kconfig
index 7055cb5d2880..7055cb5d2880 100644
--- a/fs/ksmbd/Kconfig
+++ b/fs/smb/server/Kconfig
diff --git a/fs/ksmbd/Makefile b/fs/smb/server/Makefile
index 7d6337a7dee4..7d6337a7dee4 100644
--- a/fs/ksmbd/Makefile
+++ b/fs/smb/server/Makefile
diff --git a/fs/ksmbd/asn1.c b/fs/smb/server/asn1.c
index cc6384f79675..cc6384f79675 100644
--- a/fs/ksmbd/asn1.c
+++ b/fs/smb/server/asn1.c
diff --git a/fs/ksmbd/asn1.h b/fs/smb/server/asn1.h
index ce105f4ce305..ce105f4ce305 100644
--- a/fs/ksmbd/asn1.h
+++ b/fs/smb/server/asn1.h
diff --git a/fs/ksmbd/auth.c b/fs/smb/server/auth.c
index df8fb076f6f1..5e5e120edcc2 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/smb/server/auth.c
@@ -29,7 +29,7 @@
#include "mgmt/user_config.h"
#include "crypto_ctx.h"
#include "transport_ipc.h"
-#include "../smbfs_common/arc4.h"
+#include "../common/arc4.h"
/*
* Fixed format data defining GSS header and fixed string
diff --git a/fs/ksmbd/auth.h b/fs/smb/server/auth.h
index 362b6159a6cf..362b6159a6cf 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/smb/server/auth.h
diff --git a/fs/ksmbd/connection.c b/fs/smb/server/connection.c
index 4ed379f9b1aa..4882a812ea86 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/smb/server/connection.c
@@ -351,7 +351,8 @@ int ksmbd_conn_handler_loop(void *p)
break;
/* 4 for rfc1002 length field */
- size = pdu_size + 4;
+ /* 1 for implied bcc[0] */
+ size = pdu_size + 4 + 1;
conn->request_buf = kvmalloc(size, GFP_KERNEL);
if (!conn->request_buf)
break;
diff --git a/fs/ksmbd/connection.h b/fs/smb/server/connection.h
index ad8dfaa48ffb..ad8dfaa48ffb 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/smb/server/connection.h
diff --git a/fs/ksmbd/crypto_ctx.c b/fs/smb/server/crypto_ctx.c
index 81488d04199d..81488d04199d 100644
--- a/fs/ksmbd/crypto_ctx.c
+++ b/fs/smb/server/crypto_ctx.c
diff --git a/fs/ksmbd/crypto_ctx.h b/fs/smb/server/crypto_ctx.h
index 4a367c62f653..4a367c62f653 100644
--- a/fs/ksmbd/crypto_ctx.h
+++ b/fs/smb/server/crypto_ctx.h
diff --git a/fs/ksmbd/glob.h b/fs/smb/server/glob.h
index 5b8f3e0ebdb3..5b8f3e0ebdb3 100644
--- a/fs/ksmbd/glob.h
+++ b/fs/smb/server/glob.h
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index fb8b2d566efb..fb8b2d566efb 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
diff --git a/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1 b/fs/smb/server/ksmbd_spnego_negtokeninit.asn1
index 0065f191b54b..0065f191b54b 100644
--- a/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1
+++ b/fs/smb/server/ksmbd_spnego_negtokeninit.asn1
diff --git a/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1 b/fs/smb/server/ksmbd_spnego_negtokentarg.asn1
index 1151933e7b9c..1151933e7b9c 100644
--- a/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1
+++ b/fs/smb/server/ksmbd_spnego_negtokentarg.asn1
diff --git a/fs/ksmbd/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index 14b9caebf7a4..14b9caebf7a4 100644
--- a/fs/ksmbd/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/smb/server/ksmbd_work.h
index f8ae6144c0ae..f8ae6144c0ae 100644
--- a/fs/ksmbd/ksmbd_work.h
+++ b/fs/smb/server/ksmbd_work.h
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.c b/fs/smb/server/mgmt/ksmbd_ida.c
index 54194d959a5e..54194d959a5e 100644
--- a/fs/ksmbd/mgmt/ksmbd_ida.c
+++ b/fs/smb/server/mgmt/ksmbd_ida.c
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.h b/fs/smb/server/mgmt/ksmbd_ida.h
index 2bc07b16cfde..2bc07b16cfde 100644
--- a/fs/ksmbd/mgmt/ksmbd_ida.h
+++ b/fs/smb/server/mgmt/ksmbd_ida.h
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
index 328a412259dc..328a412259dc 100644
--- a/fs/ksmbd/mgmt/share_config.c
+++ b/fs/smb/server/mgmt/share_config.c
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h
index 3fd338293942..3fd338293942 100644
--- a/fs/ksmbd/mgmt/share_config.h
+++ b/fs/smb/server/mgmt/share_config.h
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c
index f07a05f37651..f07a05f37651 100644
--- a/fs/ksmbd/mgmt/tree_connect.c
+++ b/fs/smb/server/mgmt/tree_connect.c
diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h
index 700df36cf3e3..700df36cf3e3 100644
--- a/fs/ksmbd/mgmt/tree_connect.h
+++ b/fs/smb/server/mgmt/tree_connect.h
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/smb/server/mgmt/user_config.c
index 279d00feff21..279d00feff21 100644
--- a/fs/ksmbd/mgmt/user_config.c
+++ b/fs/smb/server/mgmt/user_config.c
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/smb/server/mgmt/user_config.h
index 6a44109617f1..6a44109617f1 100644
--- a/fs/ksmbd/mgmt/user_config.h
+++ b/fs/smb/server/mgmt/user_config.h
diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 8a5dcab05614..8a5dcab05614 100644
--- a/fs/ksmbd/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h
index f99d475b28db..f99d475b28db 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/smb/server/mgmt/user_session.h
diff --git a/fs/ksmbd/misc.c b/fs/smb/server/misc.c
index 9e8afaa686e3..9e8afaa686e3 100644
--- a/fs/ksmbd/misc.c
+++ b/fs/smb/server/misc.c
diff --git a/fs/ksmbd/misc.h b/fs/smb/server/misc.h
index 1facfcd21200..1facfcd21200 100644
--- a/fs/ksmbd/misc.h
+++ b/fs/smb/server/misc.h
diff --git a/fs/ksmbd/ndr.c b/fs/smb/server/ndr.c
index 3507d8f89074..3507d8f89074 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/smb/server/ndr.c
diff --git a/fs/ksmbd/ndr.h b/fs/smb/server/ndr.h
index f3c108c8cf4d..f3c108c8cf4d 100644
--- a/fs/ksmbd/ndr.h
+++ b/fs/smb/server/ndr.h
diff --git a/fs/ksmbd/nterr.h b/fs/smb/server/nterr.h
index 2f358f88a018..2f358f88a018 100644
--- a/fs/ksmbd/nterr.h
+++ b/fs/smb/server/nterr.h
diff --git a/fs/ksmbd/ntlmssp.h b/fs/smb/server/ntlmssp.h
index f13153c18b4e..f13153c18b4e 100644
--- a/fs/ksmbd/ntlmssp.h
+++ b/fs/smb/server/ntlmssp.h
diff --git a/fs/ksmbd/oplock.c b/fs/smb/server/oplock.c
index 2e54ded4d92c..db181bdad73a 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -157,13 +157,42 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
rcu_read_lock();
opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
op_entry);
- if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
- opinfo = NULL;
+ if (opinfo) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ else {
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ atomic_dec(&opinfo->refcount);
+ opinfo = NULL;
+ }
+ }
+ }
+
rcu_read_unlock();
return opinfo;
}
+static void opinfo_conn_put(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn;
+
+ if (!opinfo)
+ return;
+
+ conn = opinfo->conn;
+ /*
+ * Checking waitqueue to dropping pending requests on
+ * disconnection. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
+ wake_up(&conn->r_count_q);
+ opinfo_put(opinfo);
+}
+
void opinfo_put(struct oplock_info *opinfo)
{
if (!atomic_dec_and_test(&opinfo->refcount))
@@ -666,13 +695,6 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
- /*
- * Checking waitqueue to dropping pending requests on
- * disconnection. waitqueue_active is safe because it
- * uses atomic operation for condition.
- */
- if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
- wake_up(&conn->r_count_q);
}
/**
@@ -706,7 +728,6 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
- atomic_inc(&conn->r_count);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
INIT_WORK(&work->work, __smb2_oplock_break_noti);
ksmbd_queue_work(work);
@@ -776,13 +797,6 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
- /*
- * Checking waitqueue to dropping pending requests on
- * disconnection. waitqueue_active is safe because it
- * uses atomic operation for condition.
- */
- if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
- wake_up(&conn->r_count_q);
}
/**
@@ -822,7 +836,6 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
- atomic_inc(&conn->r_count);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
list_for_each_safe(tmp, t, &opinfo->interim_list) {
struct ksmbd_work *in_work;
@@ -1144,8 +1157,10 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
}
prev_opinfo = opinfo_get_list(ci);
if (!prev_opinfo ||
- (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx))
+ (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx)) {
+ opinfo_conn_put(prev_opinfo);
goto set_lev;
+ }
prev_op_has_lease = prev_opinfo->is_lease;
if (prev_op_has_lease)
prev_op_state = prev_opinfo->o_lease->state;
@@ -1153,19 +1168,19 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
if (share_ret < 0 &&
prev_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
err = share_ret;
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
goto err_out;
}
if (prev_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
prev_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
goto op_break_not_needed;
}
list_add(&work->interim_entry, &prev_opinfo->interim_list);
err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
if (err == -ENOENT)
goto set_lev;
/* Check all oplock was freed by close */
@@ -1228,14 +1243,14 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work,
return;
if (brk_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
brk_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
- opinfo_put(brk_opinfo);
+ opinfo_conn_put(brk_opinfo);
return;
}
brk_opinfo->open_trunc = is_trunc;
list_add(&work->interim_entry, &brk_opinfo->interim_list);
oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
- opinfo_put(brk_opinfo);
+ opinfo_conn_put(brk_opinfo);
}
/**
@@ -1263,6 +1278,13 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
if (!atomic_inc_not_zero(&brk_op->refcount))
continue;
+
+ atomic_inc(&brk_op->conn->r_count);
+ if (ksmbd_conn_releasing(brk_op->conn)) {
+ atomic_dec(&brk_op->conn->r_count);
+ continue;
+ }
+
rcu_read_unlock();
if (brk_op->is_lease && (brk_op->o_lease->state &
(~(SMB2_LEASE_READ_CACHING_LE |
@@ -1292,7 +1314,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
brk_op->open_trunc = is_trunc;
oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
next:
- opinfo_put(brk_op);
+ opinfo_conn_put(brk_op);
rcu_read_lock();
}
rcu_read_unlock();
@@ -1449,11 +1471,12 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
* smb2_find_context_vals() - find a particular context info in open request
* @open_req: buffer containing smb2 file open(create) request
* @tag: context name to search for
+ * @tag_len: the length of tag
*
* Return: pointer to requested context, NULL if @str context not found
* or error pointer if name length is invalid.
*/
-struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
+struct create_context *smb2_find_context_vals(void *open_req, const char *tag, int tag_len)
{
struct create_context *cc;
unsigned int next = 0;
@@ -1492,7 +1515,7 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
return ERR_PTR(-EINVAL);
name = (char *)cc + name_off;
- if (memcmp(name, tag, name_len) == 0)
+ if (name_len == tag_len && !memcmp(name, tag, name_len))
return cc;
remain_len -= next;
diff --git a/fs/ksmbd/oplock.h b/fs/smb/server/oplock.h
index 09753448f779..4b0fe6da7694 100644
--- a/fs/ksmbd/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -118,7 +118,7 @@ void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp);
void create_mxac_rsp_buf(char *cc, int maximal_access);
void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id);
void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp);
-struct create_context *smb2_find_context_vals(void *open_req, const char *str);
+struct create_context *smb2_find_context_vals(void *open_req, const char *tag, int tag_len);
struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
char *lease_key);
int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
diff --git a/fs/ksmbd/server.c b/fs/smb/server/server.c
index f9b2e0f19b03..f9b2e0f19b03 100644
--- a/fs/ksmbd/server.c
+++ b/fs/smb/server/server.c
diff --git a/fs/ksmbd/server.h b/fs/smb/server/server.h
index db7278181760..db7278181760 100644
--- a/fs/ksmbd/server.h
+++ b/fs/smb/server/server.h
diff --git a/fs/ksmbd/smb2misc.c b/fs/smb/server/smb2misc.c
index fbdde426dd01..0ffe663b7590 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/smb/server/smb2misc.c
@@ -416,8 +416,11 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
/*
* Allow a message that padded to 8byte boundary.
+ * Linux 4.19.217 with smb 3.0.2 are sometimes
+ * sending messages where the cls_len is exactly
+ * 8 bytes less than len.
*/
- if (clc_len < len && (len - clc_len) < 8)
+ if (clc_len < len && (len - clc_len) <= 8)
goto validate_credit;
pr_err_ratelimited(
diff --git a/fs/ksmbd/smb2ops.c b/fs/smb/server/smb2ops.c
index aed7704a0672..aed7704a0672 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
diff --git a/fs/ksmbd/smb2pdu.c b/fs/smb/server/smb2pdu.c
index cb93fd231f4e..7a81541de602 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -326,13 +326,9 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
if (hdr->Command == SMB2_NEGOTIATE)
aux_max = 1;
else
- aux_max = conn->vals->max_credits - credit_charge;
+ aux_max = conn->vals->max_credits - conn->total_credits;
credits_granted = min_t(unsigned short, credits_requested, aux_max);
- if (conn->vals->max_credits - conn->total_credits < credits_granted)
- credits_granted = conn->vals->max_credits -
- conn->total_credits;
-
conn->total_credits += credits_granted;
work->credits_granted += credits_granted;
@@ -849,13 +845,14 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
struct smb2_preauth_neg_context *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
/*
* sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt,
* which may not be present. Only check for used HashAlgorithms[1].
*/
- if (len_of_ctxts < MIN_PREAUTH_CTXT_DATA_LEN)
+ if (ctxt_len <
+ sizeof(struct smb2_neg_context) + MIN_PREAUTH_CTXT_DATA_LEN)
return STATUS_INVALID_PARAMETER;
if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512)
@@ -867,15 +864,23 @@ static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
struct smb2_encryption_neg_context *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
- int cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
- int i, cphs_size = cph_cnt * sizeof(__le16);
+ int cph_cnt;
+ int i, cphs_size;
+
+ if (sizeof(struct smb2_encryption_neg_context) > ctxt_len) {
+ pr_err("Invalid SMB2_ENCRYPTION_CAPABILITIES context size\n");
+ return;
+ }
conn->cipher_type = 0;
+ cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
+ cphs_size = cph_cnt * sizeof(__le16);
+
if (sizeof(struct smb2_encryption_neg_context) + cphs_size >
- len_of_ctxts) {
+ ctxt_len) {
pr_err("Invalid cipher count(%d)\n", cph_cnt);
return;
}
@@ -923,15 +928,22 @@ static void decode_compress_ctxt(struct ksmbd_conn *conn,
static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
struct smb2_signing_capabilities *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
- int sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
- int i, sign_alos_size = sign_algo_cnt * sizeof(__le16);
+ int sign_algo_cnt;
+ int i, sign_alos_size;
+
+ if (sizeof(struct smb2_signing_capabilities) > ctxt_len) {
+ pr_err("Invalid SMB2_SIGNING_CAPABILITIES context length\n");
+ return;
+ }
conn->signing_negotiated = false;
+ sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
+ sign_alos_size = sign_algo_cnt * sizeof(__le16);
if (sizeof(struct smb2_signing_capabilities) + sign_alos_size >
- len_of_ctxts) {
+ ctxt_len) {
pr_err("Invalid signing algorithm count(%d)\n", sign_algo_cnt);
return;
}
@@ -969,18 +981,16 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
len_of_ctxts = len_of_smb - offset;
while (i++ < neg_ctxt_cnt) {
- int clen;
-
- /* check that offset is not beyond end of SMB */
- if (len_of_ctxts == 0)
- break;
+ int clen, ctxt_len;
if (len_of_ctxts < sizeof(struct smb2_neg_context))
break;
pctx = (struct smb2_neg_context *)((char *)pctx + offset);
clen = le16_to_cpu(pctx->DataLength);
- if (clen + sizeof(struct smb2_neg_context) > len_of_ctxts)
+ ctxt_len = clen + sizeof(struct smb2_neg_context);
+
+ if (ctxt_len > len_of_ctxts)
break;
if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) {
@@ -991,7 +1001,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
status = decode_preauth_ctxt(conn,
(struct smb2_preauth_neg_context *)pctx,
- len_of_ctxts);
+ ctxt_len);
if (status != STATUS_SUCCESS)
break;
} else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
@@ -1002,7 +1012,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
decode_encrypt_ctxt(conn,
(struct smb2_encryption_neg_context *)pctx,
- len_of_ctxts);
+ ctxt_len);
} else if (pctx->ContextType == SMB2_COMPRESSION_CAPABILITIES) {
ksmbd_debug(SMB,
"deassemble SMB2_COMPRESSION_CAPABILITIES context\n");
@@ -1021,9 +1031,10 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
} else if (pctx->ContextType == SMB2_SIGNING_CAPABILITIES) {
ksmbd_debug(SMB,
"deassemble SMB2_SIGNING_CAPABILITIES context\n");
+
decode_sign_cap_ctxt(conn,
(struct smb2_signing_capabilities *)pctx,
- len_of_ctxts);
+ ctxt_len);
}
/* offsets must be 8 byte aligned */
@@ -1057,16 +1068,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
return rc;
}
- if (req->DialectCount == 0) {
- pr_err("malformed packet\n");
+ smb2_buf_len = get_rfc1002_len(work->request_buf);
+ smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
+ if (smb2_neg_size > smb2_buf_len) {
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
rc = -EINVAL;
goto err_out;
}
- smb2_buf_len = get_rfc1002_len(work->request_buf);
- smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
- if (smb2_neg_size > smb2_buf_len) {
+ if (req->DialectCount == 0) {
+ pr_err("malformed packet\n");
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
rc = -EINVAL;
goto err_out;
@@ -1356,7 +1367,7 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
struct authenticate_message *authblob;
struct ksmbd_user *user;
char *name;
- unsigned int auth_msg_len, name_off, name_len, secbuf_len;
+ unsigned int name_off, name_len, secbuf_len;
secbuf_len = le16_to_cpu(req->SecurityBufferLength);
if (secbuf_len < sizeof(struct authenticate_message)) {
@@ -1366,9 +1377,8 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
authblob = user_authblob(conn, req);
name_off = le32_to_cpu(authblob->UserName.BufferOffset);
name_len = le16_to_cpu(authblob->UserName.Length);
- auth_msg_len = le16_to_cpu(req->SecurityBufferOffset) + secbuf_len;
- if (auth_msg_len < (u64)name_off + name_len)
+ if (secbuf_len < (u64)name_off + name_len)
return NULL;
name = smb_strndup_from_utf16((const char *)authblob + name_off,
@@ -2464,7 +2474,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
return -ENOENT;
/* Parse SD BUFFER create contexts */
- context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER);
+ context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER, 4);
if (!context)
return -ENOENT;
else if (IS_ERR(context))
@@ -2666,7 +2676,7 @@ int smb2_open(struct ksmbd_work *work)
if (req->CreateContextsOffset) {
/* Parse non-durable handle create contexts */
- context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER);
+ context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2686,7 +2696,7 @@ int smb2_open(struct ksmbd_work *work)
}
context = smb2_find_context_vals(req,
- SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST);
+ SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2697,7 +2707,7 @@ int smb2_open(struct ksmbd_work *work)
}
context = smb2_find_context_vals(req,
- SMB2_CREATE_TIMEWARP_REQUEST);
+ SMB2_CREATE_TIMEWARP_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2709,7 +2719,7 @@ int smb2_open(struct ksmbd_work *work)
if (tcon->posix_extensions) {
context = smb2_find_context_vals(req,
- SMB2_CREATE_TAG_POSIX);
+ SMB2_CREATE_TAG_POSIX, 16);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -3107,7 +3117,7 @@ int smb2_open(struct ksmbd_work *work)
struct create_alloc_size_req *az_req;
az_req = (struct create_alloc_size_req *)smb2_find_context_vals(req,
- SMB2_CREATE_ALLOCATION_SIZE);
+ SMB2_CREATE_ALLOCATION_SIZE, 4);
if (IS_ERR(az_req)) {
rc = PTR_ERR(az_req);
goto err_out;
@@ -3134,7 +3144,7 @@ int smb2_open(struct ksmbd_work *work)
err);
}
- context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID);
+ context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out;
@@ -4359,21 +4369,6 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
return 0;
}
-static unsigned long long get_allocation_size(struct inode *inode,
- struct kstat *stat)
-{
- unsigned long long alloc_size = 0;
-
- if (!S_ISDIR(stat->mode)) {
- if ((inode->i_blocks << 9) <= stat->size)
- alloc_size = stat->size;
- else
- alloc_size = inode->i_blocks << 9;
- }
-
- return alloc_size;
-}
-
static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
struct ksmbd_file *fp, void *rsp_org)
{
@@ -4388,7 +4383,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
- sinfo->AllocationSize = cpu_to_le64(get_allocation_size(inode, &stat));
+ sinfo->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
sinfo->DeletePending = delete_pending;
@@ -4453,7 +4448,7 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->Attributes = fp->f_ci->m_fattr;
file_info->Pad1 = 0;
file_info->AllocationSize =
- cpu_to_le64(get_allocation_size(inode, &stat));
+ cpu_to_le64(inode->i_blocks << 9);
file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
file_info->NumberOfLinks =
cpu_to_le32(get_nlink(&stat) - delete_pending);
@@ -4642,7 +4637,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
file_info->AllocationSize =
- cpu_to_le64(get_allocation_size(inode, &stat));
+ cpu_to_le64(inode->i_blocks << 9);
file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
file_info->Reserved = cpu_to_le32(0);
rsp->OutputBufferLength =
@@ -5507,7 +5502,7 @@ static int smb2_create_link(struct ksmbd_work *work,
{
char *link_name = NULL, *target_name = NULL, *pathname = NULL;
struct path path;
- bool file_present = true;
+ bool file_present = false;
int rc;
if (buf_len < (u64)sizeof(struct smb2_file_link_info) +
@@ -5540,8 +5535,8 @@ static int smb2_create_link(struct ksmbd_work *work,
if (rc) {
if (rc != -ENOENT)
goto out;
- file_present = false;
- }
+ } else
+ file_present = true;
if (file_info->ReplaceIfExists) {
if (file_present) {
diff --git a/fs/ksmbd/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 2767c08a534a..2767c08a534a 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
diff --git a/fs/ksmbd/smb_common.c b/fs/smb/server/smb_common.c
index af0c2a9b8529..af0c2a9b8529 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/smb/server/smb_common.c
diff --git a/fs/ksmbd/smb_common.h b/fs/smb/server/smb_common.h
index 9130d2e3cd78..6b0d5f1fe85c 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -10,7 +10,7 @@
#include "glob.h"
#include "nterr.h"
-#include "../smbfs_common/smb2pdu.h"
+#include "../common/smb2pdu.h"
#include "smb2pdu.h"
/* ksmbd's Specific ERRNO */
diff --git a/fs/ksmbd/smbacl.c b/fs/smb/server/smbacl.c
index 6d6cfb6957a9..6d6cfb6957a9 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/smb/server/smbacl.c
diff --git a/fs/ksmbd/smbacl.h b/fs/smb/server/smbacl.h
index 49a8c292bd2e..49a8c292bd2e 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/smb/server/smbacl.h
diff --git a/fs/ksmbd/smbfsctl.h b/fs/smb/server/smbfsctl.h
index b98418aae20c..ecdf8f6e0df4 100644
--- a/fs/ksmbd/smbfsctl.h
+++ b/fs/smb/server/smbfsctl.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
/*
- * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
+ * fs/smb/server/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
*
* Copyright (c) International Business Machines Corp., 2002,2009
* Author(s): Steve French (sfrench@us.ibm.com)
diff --git a/fs/ksmbd/smbstatus.h b/fs/smb/server/smbstatus.h
index 108a8b6ed24a..8963deb42404 100644
--- a/fs/ksmbd/smbstatus.h
+++ b/fs/smb/server/smbstatus.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
/*
- * fs/cifs/smb2status.h
+ * fs/server/smb2status.h
*
* SMB2 Status code (network error) definitions
* Definitions are from MS-ERREF
diff --git a/fs/ksmbd/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 40c721f9227e..40c721f9227e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
diff --git a/fs/ksmbd/transport_ipc.h b/fs/smb/server/transport_ipc.h
index 5e5b90a0c187..5e5b90a0c187 100644
--- a/fs/ksmbd/transport_ipc.h
+++ b/fs/smb/server/transport_ipc.h
diff --git a/fs/ksmbd/transport_rdma.c b/fs/smb/server/transport_rdma.c
index c06efc020bd9..c06efc020bd9 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
diff --git a/fs/ksmbd/transport_rdma.h b/fs/smb/server/transport_rdma.h
index 77aee4e5c9dc..77aee4e5c9dc 100644
--- a/fs/ksmbd/transport_rdma.h
+++ b/fs/smb/server/transport_rdma.h
diff --git a/fs/ksmbd/transport_tcp.c b/fs/smb/server/transport_tcp.c
index eff7a1d793f0..eff7a1d793f0 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
diff --git a/fs/ksmbd/transport_tcp.h b/fs/smb/server/transport_tcp.h
index e338bebe322f..e338bebe322f 100644
--- a/fs/ksmbd/transport_tcp.h
+++ b/fs/smb/server/transport_tcp.h
diff --git a/fs/ksmbd/unicode.c b/fs/smb/server/unicode.c
index 9ae676906ed3..9ae676906ed3 100644
--- a/fs/ksmbd/unicode.c
+++ b/fs/smb/server/unicode.c
diff --git a/fs/ksmbd/unicode.h b/fs/smb/server/unicode.h
index 076f6034a789..076f6034a789 100644
--- a/fs/ksmbd/unicode.h
+++ b/fs/smb/server/unicode.h
diff --git a/fs/ksmbd/uniupr.h b/fs/smb/server/uniupr.h
index 26583b776897..26583b776897 100644
--- a/fs/ksmbd/uniupr.h
+++ b/fs/smb/server/uniupr.h
diff --git a/fs/ksmbd/vfs.c b/fs/smb/server/vfs.c
index 778c152708e4..6f302919e9f7 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -86,12 +86,14 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
err = vfs_path_parent_lookup(filename, flags,
&parent_path, &last, &type,
root_share_path);
- putname(filename);
- if (err)
+ if (err) {
+ putname(filename);
return err;
+ }
if (unlikely(type != LAST_NORM)) {
path_put(&parent_path);
+ putname(filename);
return -ENOENT;
}
@@ -108,12 +110,14 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
path->dentry = d;
path->mnt = share_conf->vfs_path.mnt;
path_put(&parent_path);
+ putname(filename);
return 0;
err_out:
inode_unlock(parent_path.dentry->d_inode);
path_put(&parent_path);
+ putname(filename);
return -ENOENT;
}
@@ -743,6 +747,7 @@ retry:
rd.new_dir = new_path.dentry->d_inode,
rd.new_dentry = new_dentry,
rd.flags = flags,
+ rd.delegated_inode = NULL,
err = vfs_rename(&rd);
if (err)
ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
diff --git a/fs/ksmbd/vfs.h b/fs/smb/server/vfs.h
index a4ae89f3230d..a4ae89f3230d 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/smb/server/vfs.h
diff --git a/fs/ksmbd/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 2d0138e72d78..2d0138e72d78 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
diff --git a/fs/ksmbd/vfs_cache.h b/fs/smb/server/vfs_cache.h
index fcb13413fa8d..fcb13413fa8d 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
diff --git a/fs/ksmbd/xattr.h b/fs/smb/server/xattr.h
index 16499ca5c82d..16499ca5c82d 100644
--- a/fs/ksmbd/xattr.h
+++ b/fs/smb/server/xattr.h
diff --git a/fs/statfs.c b/fs/statfs.c
index 0ba34c135593..96d1c3edf289 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -130,6 +130,7 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
if (sizeof(buf) == sizeof(*st))
memcpy(&buf, st, sizeof(*st));
else {
+ memset(&buf, 0, sizeof(buf));
if (sizeof buf.f_blocks == 4) {
if ((st->f_blocks | st->f_bfree | st->f_bavail |
st->f_bsize | st->f_frsize) &
@@ -158,7 +159,6 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
buf.f_namelen = st->f_namelen;
buf.f_frsize = st->f_frsize;
buf.f_flags = st->f_flags;
- memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
if (copy_to_user(p, &buf, sizeof(buf)))
return -EFAULT;
@@ -171,6 +171,7 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
if (sizeof(buf) == sizeof(*st))
memcpy(&buf, st, sizeof(*st));
else {
+ memset(&buf, 0, sizeof(buf));
buf.f_type = st->f_type;
buf.f_bsize = st->f_bsize;
buf.f_blocks = st->f_blocks;
@@ -182,7 +183,6 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
buf.f_namelen = st->f_namelen;
buf.f_frsize = st->f_frsize;
buf.f_flags = st->f_flags;
- memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
if (copy_to_user(p, &buf, sizeof(buf)))
return -EFAULT;
diff --git a/fs/xattr.c b/fs/xattr.c
index fcf67d80d7f9..e7bbb7f57557 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -985,9 +985,16 @@ int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
return 0;
}
-/*
+/**
+ * generic_listxattr - run through a dentry's xattr list() operations
+ * @dentry: dentry to list the xattrs
+ * @buffer: result buffer
+ * @buffer_size: size of @buffer
+ *
* Combine the results of the list() operation from every xattr_handler in the
- * list.
+ * xattr_handler stack.
+ *
+ * Note that this will not include the entries for POSIX ACLs.
*/
ssize_t
generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
@@ -996,10 +1003,6 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
ssize_t remaining_size = buffer_size;
int err = 0;
- err = posix_acl_listxattr(d_inode(dentry), &buffer, &remaining_size);
- if (err)
- return err;
-
for_each_xattr_handler(handlers, handler) {
if (!handler->name || (handler->list && !handler->list(dentry)))
continue;
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 1b078bbbf225..ee84835ebc66 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
ASSERT(start >= mp->m_ag_prealloc_blocks);
if (start != mp->m_ag_prealloc_blocks) {
/*
- * Modify first record to pad stripe align of log
+ * Modify first record to pad stripe align of log and
+ * bump the record count.
*/
arec->ar_blockcount = cpu_to_be32(start -
mp->m_ag_prealloc_blocks);
+ be16_add_cpu(&block->bb_numrecs, 1);
nrec = arec + 1;
/*
@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
be32_to_cpu(arec->ar_startblock) +
be32_to_cpu(arec->ar_blockcount));
arec = nrec;
- be16_add_cpu(&block->bb_numrecs, 1);
}
/*
* Change record start to after the internal log
@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
}
/*
- * Calculate the record block count and check for the case where
- * the log might have consumed all available space in the AG. If
- * so, reset the record count to 0 to avoid exposure of an invalid
- * record start block.
+ * Calculate the block count of this record; if it is nonzero,
+ * increment the record count.
*/
arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock));
- if (!arec->ar_blockcount)
- block->bb_numrecs = 0;
+ if (arec->ar_blockcount)
+ be16_add_cpu(&block->bb_numrecs, 1);
}
/*
@@ -538,7 +537,7 @@ xfs_bnoroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
@@ -548,7 +547,7 @@ xfs_cntroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
@@ -985,7 +984,10 @@ xfs_ag_shrink_space(
if (err2 != -ENOSPC)
goto resv_err;
- __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
+ err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
+ true);
+ if (err2)
+ goto resv_err;
/*
* Roll the transaction before trying to re-init the per-ag
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index fdfa08cbf4db..c20fe99405d8 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -628,6 +628,25 @@ xfs_alloc_fixup_trees(
return 0;
}
+/*
+ * We do not verify the AGFL contents against AGF-based index counters here,
+ * even though we may have access to the perag that contains shadow copies. We
+ * don't know if the AGF based counters have been checked, and if they have they
+ * still may be inconsistent because they haven't yet been reset on the first
+ * allocation after the AGF has been read in.
+ *
+ * This means we can only check that all agfl entries contain valid or null
+ * values because we can't reliably determine the active range to exclude
+ * NULLAGBNO as a valid value.
+ *
+ * However, we can't even do that for v4 format filesystems because there are
+ * old versions of mkfs out there that does not initialise the AGFL to known,
+ * verifiable values. HEnce we can't tell the difference between a AGFL block
+ * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
+ *
+ * As a result, we can only fully validate AGFL block numbers when we pull them
+ * from the freelist in xfs_alloc_get_freelist().
+ */
static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
@@ -637,12 +656,6 @@ xfs_agfl_verify(
__be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
int i;
- /*
- * There is no verification of non-crc AGFLs because mkfs does not
- * initialise the AGFL to zero or NULL. Hence the only valid part of the
- * AGFL is what the AGF says is active. We can't get to the AGF, so we
- * can't verify just those entries are valid.
- */
if (!xfs_has_crc(mp))
return NULL;
@@ -2321,12 +2334,16 @@ xfs_free_agfl_block(
}
/*
- * Check the agfl fields of the agf for inconsistency or corruption. The purpose
- * is to detect an agfl header padding mismatch between current and early v5
- * kernels. This problem manifests as a 1-slot size difference between the
- * on-disk flcount and the active [first, last] range of a wrapped agfl. This
- * may also catch variants of agfl count corruption unrelated to padding. Either
- * way, we'll reset the agfl and warn the user.
+ * Check the agfl fields of the agf for inconsistency or corruption.
+ *
+ * The original purpose was to detect an agfl header padding mismatch between
+ * current and early v5 kernels. This problem manifests as a 1-slot size
+ * difference between the on-disk flcount and the active [first, last] range of
+ * a wrapped agfl.
+ *
+ * However, we need to use these same checks to catch agfl count corruptions
+ * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
+ * way, we need to reset the agfl and warn the user.
*
* Return true if a reset is required before the agfl can be used, false
* otherwise.
@@ -2342,10 +2359,6 @@ xfs_agfl_needs_reset(
int agfl_size = xfs_agfl_size(mp);
int active;
- /* no agfl header on v4 supers */
- if (!xfs_has_crc(mp))
- return false;
-
/*
* The agf read verifier catches severe corruption of these fields.
* Repeat some sanity checks to cover a packed -> unpacked mismatch if
@@ -2418,7 +2431,7 @@ xfs_agfl_reset(
* the real allocation can proceed. Deferring the free disconnects freeing up
* the AGFL slot from freeing the block.
*/
-STATIC void
+static int
xfs_defer_agfl_block(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -2437,17 +2450,21 @@ xfs_defer_agfl_block(
xefi->xefi_blockcount = 1;
xefi->xefi_owner = oinfo->oi_owner;
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
+ return -EFSCORRUPTED;
+
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
+ return 0;
}
/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
-void
+int
__xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
@@ -2474,6 +2491,9 @@ __xfs_free_extent_later(
#endif
ASSERT(xfs_extfree_item_cache != NULL);
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
@@ -2497,6 +2517,7 @@ __xfs_free_extent_later(
xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+ return 0;
}
#ifdef DEBUG
@@ -2657,7 +2678,9 @@ xfs_alloc_fix_freelist(
goto out_agbp_relse;
/* defer agfl frees */
- xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ if (error)
+ goto out_agbp_relse;
}
targs.tp = tp;
@@ -2767,6 +2790,9 @@ xfs_alloc_get_freelist(
*/
agfl_bno = xfs_buf_to_agfl_bno(agflbp);
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+ if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
+ return -EFSCORRUPTED;
+
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
@@ -2889,6 +2915,19 @@ xfs_alloc_put_freelist(
return 0;
}
+/*
+ * Verify the AGF is consistent.
+ *
+ * We do not verify the AGFL indexes in the AGF are fully consistent here
+ * because of issues with variable on-disk structure sizes. Instead, we check
+ * the agfl indexes for consistency when we initialise the perag from the AGF
+ * information after a read completes.
+ *
+ * If the index is inconsistent, then we mark the perag as needing an AGFL
+ * reset. The first AGFL update performed then resets the AGFL indexes and
+ * refills the AGFL with known good free blocks, allowing the filesystem to
+ * continue operating normally at the cost of a few leaked free space blocks.
+ */
static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
@@ -2962,7 +3001,6 @@ xfs_agf_verify(
return __this_address;
return NULL;
-
}
static void
@@ -3187,7 +3225,8 @@ xfs_alloc_vextent_check_args(
*/
static int
xfs_alloc_vextent_prepare_ag(
- struct xfs_alloc_arg *args)
+ struct xfs_alloc_arg *args,
+ uint32_t flags)
{
bool need_pag = !args->pag;
int error;
@@ -3196,7 +3235,7 @@ xfs_alloc_vextent_prepare_ag(
args->pag = xfs_perag_get(args->mp, args->agno);
args->agbp = NULL;
- error = xfs_alloc_fix_freelist(args, 0);
+ error = xfs_alloc_fix_freelist(args, flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
if (need_pag)
@@ -3336,7 +3375,7 @@ xfs_alloc_vextent_this_ag(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_size(args);
@@ -3380,7 +3419,7 @@ restart:
for_each_perag_wrap_range(mp, start_agno, restart_agno,
mp->m_sb.sb_agcount, agno, args->pag) {
args->agno = agno;
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, flags);
if (error)
break;
if (!args->agbp) {
@@ -3546,7 +3585,7 @@ xfs_alloc_vextent_exact_bno(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_exact(args);
@@ -3587,7 +3626,7 @@ xfs_alloc_vextent_near_bno(
if (needs_perag)
args->pag = xfs_perag_grab(mp, args->agno);
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_near(args);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 5dbb25546d0b..85ac470be0da 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -230,7 +230,7 @@ xfs_buf_to_agfl_bno(
return bp->b_addr;
}
-void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
+int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
@@ -254,14 +254,14 @@ void xfs_extent_free_get_group(struct xfs_mount *mp,
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
-static inline void
+static inline int
xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo)
{
- __xfs_free_extent_later(tp, bno, len, oinfo, false);
+ return __xfs_free_extent_later(tp, bno, len, oinfo, false);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index b512de0540d5..fef35696adb7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents(
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
+
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
- xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ if (error)
+ return error;
+
ip->i_nblocks--;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
@@ -3494,8 +3498,10 @@ xfs_bmap_btalloc_at_eof(
if (!caller_pag)
args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
- if (!caller_pag)
+ if (!caller_pag) {
xfs_perag_put(args->pag);
+ args->pag = NULL;
+ }
if (error)
return error;
@@ -3505,7 +3511,6 @@ xfs_bmap_btalloc_at_eof(
* Exact allocation failed. Reset to try an aligned allocation
* according to the original allocation specification.
*/
- args->pag = NULL;
args->alignment = stripe_align;
args->minlen = nextminlen;
args->minalignslop = 0;
@@ -5229,10 +5234,12 @@ xfs_bmap_del_extent_real(
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
} else {
- __xfs_free_extent_later(tp, del->br_startblock,
+ error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
(bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN);
+ if (error)
+ goto done;
}
}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 1b40e5f8b1ec..36564ae3084f 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -268,11 +268,14 @@ xfs_bmbt_free_block(
struct xfs_trans *tp = cur->bc_tp;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
struct xfs_owner_info oinfo;
+ int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
- xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
- ip->i_nblocks--;
+ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+ if (error)
+ return error;
+ ip->i_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index a16d5de16933..34600f94c2f4 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1834,7 +1834,7 @@ retry:
* might be sparse and only free the regions that are allocated as part of the
* chunk.
*/
-STATIC void
+static int
xfs_difree_inode_chunk(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -1851,10 +1851,10 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- M_IGEO(mp)->ialloc_blks,
- &XFS_RMAP_OINFO_INODES);
- return;
+ return xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, sagbno),
+ M_IGEO(mp)->ialloc_blks,
+ &XFS_RMAP_OINFO_INODES);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1871,6 +1871,8 @@ xfs_difree_inode_chunk(
XFS_INOBT_HOLEMASK_BITS);
nextbit = startidx + 1;
while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+ int error;
+
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
nextbit);
/*
@@ -1896,8 +1898,11 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &XFS_RMAP_OINFO_INODES);
+ error = xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, agbno),
+ contigblk, &XFS_RMAP_OINFO_INODES);
+ if (error)
+ return error;
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
@@ -1905,6 +1910,7 @@ xfs_difree_inode_chunk(
next:
nextbit++;
}
+ return 0;
}
STATIC int
@@ -2003,7 +2009,9 @@ xfs_difree_inobt(
goto error0;
}
- xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ if (error)
+ goto error0;
} else {
xic->deleted = false;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f13e0809dc63..269573c82808 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -324,7 +324,6 @@ struct xfs_inode_log_format_32 {
#define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */
#define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */
-
/*
* The timestamps are dirty, but not necessarily anything else in the inode
* core. Unlike the other fields above this one must never make it to disk
@@ -333,6 +332,14 @@ struct xfs_inode_log_format_32 {
*/
#define XFS_ILOG_TIMESTAMP 0x4000
+/*
+ * The version field has been changed, but not necessarily anything else of
+ * interest. This must never make it to disk - it is used purely to ensure that
+ * the inode item ->precommit operation can update the fsync flag triggers
+ * in the inode item correctly.
+ */
+#define XFS_ILOG_IVERSION 0x8000
+
#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index c1c65774dcc2..b6e21433925c 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1151,8 +1151,10 @@ xfs_refcount_adjust_extents(
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
tmp.rc_startblock);
- xfs_free_extent_later(cur->bc_tp, fsbno,
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL);
+ if (error)
+ goto out_error;
}
(*agbno) += tmp.rc_blockcount;
@@ -1210,8 +1212,10 @@ xfs_refcount_adjust_extents(
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
ext.rc_startblock);
- xfs_free_extent_later(cur->bc_tp, fsbno,
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL);
+ if (error)
+ goto out_error;
}
skip:
@@ -1976,7 +1980,10 @@ xfs_refcount_recover_cow_leftovers(
rr->rr_rrec.rc_blockcount);
/* Free the block. */
- xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
+ error = xfs_free_extent_later(tp, fsb,
+ rr->rr_rrec.rc_blockcount, NULL);
+ if (error)
+ goto out_trans;
error = xfs_trans_commit(tp);
if (error)
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 8b5547073379..cb4796b6e693 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -40,9 +40,8 @@ xfs_trans_ijoin(
iip->ili_lock_flags = lock_flags;
ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
- /*
- * Get a log_item_desc to point at the new item.
- */
+ /* Reset the per-tx dirty context and add the item to the tx. */
+ iip->ili_dirty_flags = 0;
xfs_trans_add_item(tp, &iip->ili_item);
}
@@ -76,17 +75,10 @@ xfs_trans_ichgtime(
/*
* This is called to mark the fields indicated in fieldmask as needing to be
* logged when the transaction is committed. The inode must already be
- * associated with the given transaction.
- *
- * The values for fieldmask are defined in xfs_inode_item.h. We always log all
- * of the core inode if any of it has changed, and we always log all of the
- * inline data/extents/b-tree root if any of them has changed.
- *
- * Grab and pin the cluster buffer associated with this inode to avoid RMW
- * cycles at inode writeback time. Avoid the need to add error handling to every
- * xfs_trans_log_inode() call by shutting down on read error. This will cause
- * transactions to fail and everything to error out, just like if we return a
- * read error in a dirty transaction and cancel it.
+ * associated with the given transaction. All we do here is record where the
+ * inode was dirtied and mark the transaction and inode log item dirty;
+ * everything else is done in the ->precommit log item operation after the
+ * changes in the transaction have been completed.
*/
void
xfs_trans_log_inode(
@@ -96,7 +88,6 @@ xfs_trans_log_inode(
{
struct xfs_inode_log_item *iip = ip->i_itemp;
struct inode *inode = VFS_I(ip);
- uint iversion_flags = 0;
ASSERT(iip);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -105,18 +96,6 @@ xfs_trans_log_inode(
tp->t_flags |= XFS_TRANS_DIRTY;
/*
- * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
- * don't matter - we either will need an extra transaction in 24 hours
- * to log the timestamps, or will clear already cleared fields in the
- * worst case.
- */
- if (inode->i_state & I_DIRTY_TIME) {
- spin_lock(&inode->i_lock);
- inode->i_state &= ~I_DIRTY_TIME;
- spin_unlock(&inode->i_lock);
- }
-
- /*
* First time we log the inode in a transaction, bump the inode change
* counter if it is configured for this to occur. While we have the
* inode locked exclusively for metadata modification, we can usually
@@ -128,86 +107,10 @@ xfs_trans_log_inode(
if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
if (IS_I_VERSION(inode) &&
inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
- iversion_flags = XFS_ILOG_CORE;
- }
-
- /*
- * If we're updating the inode core or the timestamps and it's possible
- * to upgrade this inode to bigtime format, do so now.
- */
- if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
- xfs_has_bigtime(ip->i_mount) &&
- !xfs_inode_has_bigtime(ip)) {
- ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
- flags |= XFS_ILOG_CORE;
- }
-
- /*
- * Inode verifiers do not check that the extent size hint is an integer
- * multiple of the rt extent size on a directory with both rtinherit
- * and extszinherit flags set. If we're logging a directory that is
- * misconfigured in this way, clear the hint.
- */
- if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
- (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
- ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
- XFS_DIFLAG_EXTSZINHERIT);
- ip->i_extsize = 0;
- flags |= XFS_ILOG_CORE;
+ flags |= XFS_ILOG_IVERSION;
}
- /*
- * Record the specific change for fdatasync optimisation. This allows
- * fdatasync to skip log forces for inodes that are only timestamp
- * dirty.
- */
- spin_lock(&iip->ili_lock);
- iip->ili_fsync_fields |= flags;
-
- if (!iip->ili_item.li_buf) {
- struct xfs_buf *bp;
- int error;
-
- /*
- * We hold the ILOCK here, so this inode is not going to be
- * flushed while we are here. Further, because there is no
- * buffer attached to the item, we know that there is no IO in
- * progress, so nothing will clear the ili_fields while we read
- * in the buffer. Hence we can safely drop the spin lock and
- * read the buffer knowing that the state will not change from
- * here.
- */
- spin_unlock(&iip->ili_lock);
- error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
- if (error) {
- xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR);
- return;
- }
-
- /*
- * We need an explicit buffer reference for the log item but
- * don't want the buffer to remain attached to the transaction.
- * Hold the buffer but release the transaction reference once
- * we've attached the inode log item to the buffer log item
- * list.
- */
- xfs_buf_hold(bp);
- spin_lock(&iip->ili_lock);
- iip->ili_item.li_buf = bp;
- bp->b_flags |= _XBF_INODES;
- list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
- xfs_trans_brelse(tp, bp);
- }
-
- /*
- * Always OR in the bits from the ili_last_fields field. This is to
- * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
- * in the eventual clearing of the ili_fields bits. See the big comment
- * in xfs_iflush() for an explanation of this coordination mechanism.
- */
- iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
- spin_unlock(&iip->ili_lock);
+ iip->ili_dirty_flags |= flags;
}
int
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 87ab9f95a487..5bf4326e9783 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -42,12 +42,12 @@ xchk_setup_inode_bmap(
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
/*
- * We don't want any ephemeral data fork updates sitting around
+ * We don't want any ephemeral data/cow fork updates sitting around
* while we inspect block mappings, so wait for directio to finish
* and flush dirty data if we have delalloc reservations.
*/
if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
- sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
+ sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
@@ -769,14 +769,14 @@ xchk_are_bmaps_contiguous(
* mapping or false if there are no more mappings. Caller must ensure that
* @info.icur is zeroed before the first call.
*/
-static int
+static bool
xchk_bmap_iext_iter(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_bmbt_irec got;
struct xfs_ifork *ifp;
- xfs_filblks_t prev_len;
+ unsigned int nr = 0;
ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
@@ -790,12 +790,12 @@ xchk_bmap_iext_iter(
irec->br_startoff);
return false;
}
+ nr++;
/*
* Iterate subsequent iextent records and merge them with the one
* that we just read, if possible.
*/
- prev_len = irec->br_blockcount;
while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
if (!xchk_are_bmaps_contiguous(irec, &got))
break;
@@ -805,20 +805,21 @@ xchk_bmap_iext_iter(
got.br_startoff);
return false;
}
-
- /*
- * Notify the user of mergeable records in the data or attr
- * forks. CoW forks only exist in memory so we ignore them.
- */
- if (info->whichfork != XFS_COW_FORK &&
- prev_len + got.br_blockcount > BMBT_BLOCKCOUNT_MASK)
- xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
+ nr++;
irec->br_blockcount += got.br_blockcount;
- prev_len = got.br_blockcount;
xfs_iext_next(ifp, &info->icur);
}
+ /*
+ * If the merged mapping could be expressed with fewer bmbt records
+ * than we actually found, notify the user that this fork could be
+ * optimized. CoW forks only exist in memory so we ignore them.
+ */
+ if (nr > 1 && info->whichfork != XFS_COW_FORK &&
+ howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
+ xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
+
return true;
}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 9aa79665c608..7a20256be969 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks(
return 0;
}
-/* Pause background reaping of resources. */
-void
-xchk_stop_reaping(
- struct xfs_scrub *sc)
-{
- sc->flags |= XCHK_REAPING_DISABLED;
- xfs_blockgc_stop(sc->mp);
- xfs_inodegc_stop(sc->mp);
-}
-
-/* Restart background reaping of resources. */
-void
-xchk_start_reaping(
- struct xfs_scrub *sc)
-{
- /*
- * Readonly filesystems do not perform inactivation or speculative
- * preallocation, so there's no need to restart the workers.
- */
- if (!xfs_is_readonly(sc->mp)) {
- xfs_inodegc_start(sc->mp);
- xfs_blockgc_start(sc->mp);
- }
- sc->flags &= ~XCHK_REAPING_DISABLED;
-}
-
/*
* Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
* operation. Callers must not hold any locks that intersect with the CPU
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 18b5f2b62f13..791235cd9b00 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
}
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
-void xchk_stop_reaping(struct xfs_scrub *sc);
-void xchk_start_reaping(struct xfs_scrub *sc);
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index faa315be7978..e382a35e98d8 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -150,13 +150,6 @@ xchk_setup_fscounters(
if (error)
return error;
- /*
- * Pause background reclaim while we're scrubbing to reduce the
- * likelihood of background perturbations to the counters throwing off
- * our calculations.
- */
- xchk_stop_reaping(sc);
-
return xchk_trans_alloc(sc, 0);
}
@@ -454,6 +447,12 @@ xchk_fscounters(
xchk_set_corrupt(sc);
/*
+ * XXX: We can't quiesce percpu counter updates, so exit early.
+ * This can be re-enabled when we gain exclusive freeze functionality.
+ */
+ return 0;
+
+ /*
* If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters.
*/
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 02819bedc5b1..3d98f604765e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -186,8 +186,6 @@ xchk_teardown(
}
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
mnt_drop_write_file(sc->file);
- if (sc->flags & XCHK_REAPING_DISABLED)
- xchk_start_reaping(sc);
if (sc->buf) {
if (sc->buf_cleanup)
sc->buf_cleanup(sc->buf);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e71903474cd7..e113f2f5c254 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -105,11 +105,10 @@ struct xfs_scrub {
};
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
-#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
-#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */
-#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
-#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
-#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
+#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
+#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
+#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
+#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
/*
* The XCHK_FSGATES* flags reflect functionality in the main filesystem that
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 68efd6fda61c..b3894daeb86a 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
#define XFS_SCRUB_STATE_STRINGS \
{ XCHK_TRY_HARDER, "try_harder" }, \
- { XCHK_REAPING_DISABLED, "reaping_disabled" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f032d3a4b727..fbb675563208 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -558,7 +558,9 @@ xfs_getbmap(
if (!xfs_iext_next_extent(ifp, &icur, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
- out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
+ if (bmv->bmv_entries > 0)
+ out[bmv->bmv_entries - 1].bmv_oflags |=
+ BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) {
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index df7322ed73fa..023d4e0385dd 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -452,10 +452,18 @@ xfs_buf_item_format(
* This is called to pin the buffer associated with the buf log item in memory
* so it cannot be written out.
*
- * We also always take a reference to the buffer log item here so that the bli
- * is held while the item is pinned in memory. This means that we can
- * unconditionally drop the reference count a transaction holds when the
- * transaction is completed.
+ * We take a reference to the buffer log item here so that the BLI life cycle
+ * extends at least until the buffer is unpinned via xfs_buf_item_unpin() and
+ * inserted into the AIL.
+ *
+ * We also need to take a reference to the buffer itself as the BLI unpin
+ * processing requires accessing the buffer after the BLI has dropped the final
+ * BLI reference. See xfs_buf_item_unpin() for an explanation.
+ * If unpins race to drop the final BLI reference and only the
+ * BLI owns a reference to the buffer, then the loser of the race can have the
+ * buffer fgreed from under it (e.g. on shutdown). Taking a buffer reference per
+ * pin count ensures the life cycle of the buffer extends for as
+ * long as we hold the buffer pin reference in xfs_buf_item_unpin().
*/
STATIC void
xfs_buf_item_pin(
@@ -470,13 +478,30 @@ xfs_buf_item_pin(
trace_xfs_buf_item_pin(bip);
+ xfs_buf_hold(bip->bli_buf);
atomic_inc(&bip->bli_refcount);
atomic_inc(&bip->bli_buf->b_pin_count);
}
/*
- * This is called to unpin the buffer associated with the buf log item which
- * was previously pinned with a call to xfs_buf_item_pin().
+ * This is called to unpin the buffer associated with the buf log item which was
+ * previously pinned with a call to xfs_buf_item_pin(). We enter this function
+ * with a buffer pin count, a buffer reference and a BLI reference.
+ *
+ * We must drop the BLI reference before we unpin the buffer because the AIL
+ * doesn't acquire a BLI reference whenever it accesses it. Therefore if the
+ * refcount drops to zero, the bli could still be AIL resident and the buffer
+ * submitted for I/O at any point before we return. This can result in IO
+ * completion freeing the buffer while we are still trying to access it here.
+ * This race condition can also occur in shutdown situations where we abort and
+ * unpin buffers from contexts other that journal IO completion.
+ *
+ * Hence we have to hold a buffer reference per pin count to ensure that the
+ * buffer cannot be freed until we have finished processing the unpin operation.
+ * The reference is taken in xfs_buf_item_pin(), and we must hold it until we
+ * are done processing the buffer state. In the case of an abort (remove =
+ * true) then we re-use the current pin reference as the IO reference we hand
+ * off to IO failure handling.
*/
STATIC void
xfs_buf_item_unpin(
@@ -493,24 +518,18 @@ xfs_buf_item_unpin(
trace_xfs_buf_item_unpin(bip);
- /*
- * Drop the bli ref associated with the pin and grab the hold required
- * for the I/O simulation failure in the abort case. We have to do this
- * before the pin count drops because the AIL doesn't acquire a bli
- * reference. Therefore if the refcount drops to zero, the bli could
- * still be AIL resident and the buffer submitted for I/O (and freed on
- * completion) at any point before we return. This can be removed once
- * the AIL properly holds a reference on the bli.
- */
freed = atomic_dec_and_test(&bip->bli_refcount);
- if (freed && !stale && remove)
- xfs_buf_hold(bp);
if (atomic_dec_and_test(&bp->b_pin_count))
wake_up_all(&bp->b_waiters);
- /* nothing to do but drop the pin count if the bli is active */
- if (!freed)
+ /*
+ * Nothing to do but drop the buffer pin reference if the BLI is
+ * still active.
+ */
+ if (!freed) {
+ xfs_buf_rele(bp);
return;
+ }
if (stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -523,6 +542,15 @@ xfs_buf_item_unpin(
trace_xfs_buf_item_unpin_stale(bip);
/*
+ * The buffer has been locked and referenced since it was marked
+ * stale so we own both lock and reference exclusively here. We
+ * do not need the pin reference any more, so drop it now so
+ * that we only have one reference to drop once item completion
+ * processing is complete.
+ */
+ xfs_buf_rele(bp);
+
+ /*
* If we get called here because of an IO error, we may or may
* not have the item on the AIL. xfs_trans_ail_delete() will
* take care of that situation. xfs_trans_ail_delete() drops
@@ -538,16 +566,30 @@ xfs_buf_item_unpin(
ASSERT(bp->b_log_item == NULL);
}
xfs_buf_relse(bp);
- } else if (remove) {
+ return;
+ }
+
+ if (remove) {
/*
- * The buffer must be locked and held by the caller to simulate
- * an async I/O failure. We acquired the hold for this case
- * before the buffer was unpinned.
+ * We need to simulate an async IO failures here to ensure that
+ * the correct error completion is run on this buffer. This
+ * requires a reference to the buffer and for the buffer to be
+ * locked. We can safely pass ownership of the pin reference to
+ * the IO to ensure that nothing can free the buffer while we
+ * wait for the lock and then run the IO failure completion.
*/
xfs_buf_lock(bp);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
+ return;
}
+
+ /*
+ * BLI has no more active references - it will be moved to the AIL to
+ * manage the remaining BLI/buffer life cycle. There is nothing left for
+ * us to do here so drop the pin reference to the buffer.
+ */
+ xfs_buf_rele(bp);
}
STATIC uint
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 22c13933c8f8..2fc98d313708 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -78,7 +78,6 @@ restart:
*longest = 0;
err = xfs_bmap_longest_free_extent(pag, NULL, longest);
if (err) {
- xfs_perag_rele(pag);
if (err != -EAGAIN)
break;
/* Couldn't lock the AGF, skip this AG. */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 351849fc18ff..453890942d9f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -435,18 +435,44 @@ xfs_iget_check_free_state(
}
/* Make all pending inactivation work start immediately. */
-static void
+static bool
xfs_inodegc_queue_all(
struct xfs_mount *mp)
{
struct xfs_inodegc *gc;
int cpu;
+ bool ret = false;
for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
- if (!llist_empty(&gc->list))
+ if (!llist_empty(&gc->list)) {
mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
+/* Wait for all queued work and collect errors */
+static int
+xfs_inodegc_wait_all(
+ struct xfs_mount *mp)
+{
+ int cpu;
+ int error = 0;
+
+ flush_workqueue(mp->m_inodegc_wq);
+ for_each_online_cpu(cpu) {
+ struct xfs_inodegc *gc;
+
+ gc = per_cpu_ptr(mp->m_inodegc, cpu);
+ if (gc->error && !error)
+ error = gc->error;
+ gc->error = 0;
}
+
+ return error;
}
/*
@@ -1486,15 +1512,14 @@ xfs_blockgc_free_space(
if (error)
return error;
- xfs_inodegc_flush(mp);
- return 0;
+ return xfs_inodegc_flush(mp);
}
/*
* Reclaim all the free space that we can by scheduling the background blockgc
* and inodegc workers immediately and waiting for them all to clear.
*/
-void
+int
xfs_blockgc_flush_all(
struct xfs_mount *mp)
{
@@ -1515,7 +1540,7 @@ xfs_blockgc_flush_all(
for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
flush_delayed_work(&pag->pag_blockgc_work);
- xfs_inodegc_flush(mp);
+ return xfs_inodegc_flush(mp);
}
/*
@@ -1837,13 +1862,17 @@ xfs_inodegc_set_reclaimable(
* This is the last chance to make changes to an otherwise unreferenced file
* before incore reclamation happens.
*/
-static void
+static int
xfs_inodegc_inactivate(
struct xfs_inode *ip)
{
+ int error;
+
trace_xfs_inode_inactivating(ip);
- xfs_inactive(ip);
+ error = xfs_inactive(ip);
xfs_inodegc_set_reclaimable(ip);
+ return error;
+
}
void
@@ -1856,6 +1885,8 @@ xfs_inodegc_worker(
struct xfs_inode *ip, *n;
unsigned int nofs_flag;
+ ASSERT(gc->cpu == smp_processor_id());
+
WRITE_ONCE(gc->items, 0);
if (!node)
@@ -1873,8 +1904,12 @@ xfs_inodegc_worker(
WRITE_ONCE(gc->shrinker_hits, 0);
llist_for_each_entry_safe(ip, n, node, i_gclist) {
+ int error;
+
xfs_iflags_set(ip, XFS_INACTIVATING);
- xfs_inodegc_inactivate(ip);
+ error = xfs_inodegc_inactivate(ip);
+ if (error && !gc->error)
+ gc->error = error;
}
memalloc_nofs_restore(nofs_flag);
@@ -1898,35 +1933,52 @@ xfs_inodegc_push(
* Force all currently queued inode inactivation work to run immediately and
* wait for the work to finish.
*/
-void
+int
xfs_inodegc_flush(
struct xfs_mount *mp)
{
xfs_inodegc_push(mp);
trace_xfs_inodegc_flush(mp, __return_address);
- flush_workqueue(mp->m_inodegc_wq);
+ return xfs_inodegc_wait_all(mp);
}
/*
* Flush all the pending work and then disable the inode inactivation background
- * workers and wait for them to stop.
+ * workers and wait for them to stop. Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
+ bool rerun;
+
if (!xfs_clear_inodegc_enabled(mp))
return;
+ /*
+ * Drain all pending inodegc work, including inodes that could be
+ * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
+ * threads that sample the inodegc state just prior to us clearing it.
+ * The inodegc flag state prevents new threads from queuing more
+ * inodes, so we queue pending work items and flush the workqueue until
+ * all inodegc lists are empty. IOWs, we cannot use drain_workqueue
+ * here because it does not allow other unserialized mechanisms to
+ * reschedule inodegc work while this draining is in progress.
+ */
xfs_inodegc_queue_all(mp);
- drain_workqueue(mp->m_inodegc_wq);
+ do {
+ flush_workqueue(mp->m_inodegc_wq);
+ rerun = xfs_inodegc_queue_all(mp);
+ } while (rerun);
trace_xfs_inodegc_stop(mp, __return_address);
}
/*
* Enable the inode inactivation background workers and schedule deferred inode
- * inactivation work if there is any.
+ * inactivation work if there is any. Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_start(
@@ -2069,7 +2121,8 @@ xfs_inodegc_queue(
queue_delay = 0;
trace_xfs_inodegc_queue(mp, __return_address);
- mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
+ mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+ queue_delay);
put_cpu_ptr(gc);
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
@@ -2113,7 +2166,8 @@ xfs_inodegc_cpu_dead(
if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address);
- mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
+ mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+ 0);
}
put_cpu_ptr(gc);
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 87910191a9dd..1dcdcb23796e 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
unsigned int iwalk_flags);
int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
-void xfs_blockgc_flush_all(struct xfs_mount *mp);
+int xfs_blockgc_flush_all(struct xfs_mount *mp);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
@@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work);
void xfs_inodegc_push(struct xfs_mount *mp);
-void xfs_inodegc_flush(struct xfs_mount *mp);
+int xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5808abab786c..9e62cc500140 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1620,16 +1620,7 @@ xfs_inactive_ifree(
*/
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
- /*
- * Just ignore errors at this point. There is nothing we can do except
- * to try to keep going. Make sure it's not a silent error.
- */
- error = xfs_trans_commit(tp);
- if (error)
- xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
- __func__, error);
-
- return 0;
+ return xfs_trans_commit(tp);
}
/*
@@ -1693,12 +1684,12 @@ xfs_inode_needs_inactive(
* now be truncated. Also, we clear all of the read-ahead state
* kept for the inode here since the file is now closed.
*/
-void
+int
xfs_inactive(
xfs_inode_t *ip)
{
struct xfs_mount *mp;
- int error;
+ int error = 0;
int truncate = 0;
/*
@@ -1736,7 +1727,7 @@ xfs_inactive(
* reference to the inode at this point anyways.
*/
if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(ip);
+ error = xfs_free_eofblocks(ip);
goto out;
}
@@ -1773,7 +1764,7 @@ xfs_inactive(
/*
* Free the inode.
*/
- xfs_inactive_ifree(ip);
+ error = xfs_inactive_ifree(ip);
out:
/*
@@ -1781,6 +1772,7 @@ out:
* the attached dquots.
*/
xfs_qm_dqdetach(ip);
+ return error;
}
/*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 69d21e42c10a..7547caf2f2ab 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -470,7 +470,7 @@ enum layout_break_reason {
(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
int xfs_release(struct xfs_inode *ip);
-void xfs_inactive(struct xfs_inode *ip);
+int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct mnt_idmap *idmap,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index ca2941ab6cbc..91c847a84e10 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -29,6 +29,153 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_inode_log_item, ili_item);
}
+static uint64_t
+xfs_inode_item_sort(
+ struct xfs_log_item *lip)
+{
+ return INODE_ITEM(lip)->ili_inode->i_ino;
+}
+
+/*
+ * Prior to finally logging the inode, we have to ensure that all the
+ * per-modification inode state changes are applied. This includes VFS inode
+ * state updates, format conversions, verifier state synchronisation and
+ * ensuring the inode buffer remains in memory whilst the inode is dirty.
+ *
+ * We have to be careful when we grab the inode cluster buffer due to lock
+ * ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
+ * require AGI -> inode cluster buffer lock order. The inode cluster buffer is
+ * not locked until ->precommit, so it happens after everything else has been
+ * modified.
+ *
+ * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
+ * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
+ * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
+ * it can be called on a inode (e.g. via bumplink/droplink) before we take the
+ * AGF lock modifying directory blocks.
+ *
+ * Rather than force a complete rework of all the transactions to call
+ * xfs_trans_log_inode() once and once only at the end of every transaction, we
+ * move the pinning of the inode cluster buffer to a ->precommit operation. This
+ * matches how the xfs_iunlink_item locks the inode cluster buffer, and it
+ * ensures that the inode cluster buffer locking is always done last in a
+ * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
+ * cluster buffer.
+ *
+ * If we return the inode number as the precommit sort key then we'll also
+ * guarantee that the order all inode cluster buffer locking is the same all the
+ * inodes and unlink items in the transaction.
+ */
+static int
+xfs_inode_item_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+ struct inode *inode = VFS_I(ip);
+ unsigned int flags = iip->ili_dirty_flags;
+
+ /*
+ * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
+ * don't matter - we either will need an extra transaction in 24 hours
+ * to log the timestamps, or will clear already cleared fields in the
+ * worst case.
+ */
+ if (inode->i_state & I_DIRTY_TIME) {
+ spin_lock(&inode->i_lock);
+ inode->i_state &= ~I_DIRTY_TIME;
+ spin_unlock(&inode->i_lock);
+ }
+
+ /*
+ * If we're updating the inode core or the timestamps and it's possible
+ * to upgrade this inode to bigtime format, do so now.
+ */
+ if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
+ xfs_has_bigtime(ip->i_mount) &&
+ !xfs_inode_has_bigtime(ip)) {
+ ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
+ flags |= XFS_ILOG_CORE;
+ }
+
+ /*
+ * Inode verifiers do not check that the extent size hint is an integer
+ * multiple of the rt extent size on a directory with both rtinherit
+ * and extszinherit flags set. If we're logging a directory that is
+ * misconfigured in this way, clear the hint.
+ */
+ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
+ (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ flags |= XFS_ILOG_CORE;
+ }
+
+ /*
+ * Record the specific change for fdatasync optimisation. This allows
+ * fdatasync to skip log forces for inodes that are only timestamp
+ * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
+ * to XFS_ILOG_CORE so that the actual on-disk dirty tracking
+ * (ili_fields) correctly tracks that the version has changed.
+ */
+ spin_lock(&iip->ili_lock);
+ iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
+ if (flags & XFS_ILOG_IVERSION)
+ flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
+
+ if (!iip->ili_item.li_buf) {
+ struct xfs_buf *bp;
+ int error;
+
+ /*
+ * We hold the ILOCK here, so this inode is not going to be
+ * flushed while we are here. Further, because there is no
+ * buffer attached to the item, we know that there is no IO in
+ * progress, so nothing will clear the ili_fields while we read
+ * in the buffer. Hence we can safely drop the spin lock and
+ * read the buffer knowing that the state will not change from
+ * here.
+ */
+ spin_unlock(&iip->ili_lock);
+ error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
+ if (error)
+ return error;
+
+ /*
+ * We need an explicit buffer reference for the log item but
+ * don't want the buffer to remain attached to the transaction.
+ * Hold the buffer but release the transaction reference once
+ * we've attached the inode log item to the buffer log item
+ * list.
+ */
+ xfs_buf_hold(bp);
+ spin_lock(&iip->ili_lock);
+ iip->ili_item.li_buf = bp;
+ bp->b_flags |= _XBF_INODES;
+ list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
+ xfs_trans_brelse(tp, bp);
+ }
+
+ /*
+ * Always OR in the bits from the ili_last_fields field. This is to
+ * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
+ * in the eventual clearing of the ili_fields bits. See the big comment
+ * in xfs_iflush() for an explanation of this coordination mechanism.
+ */
+ iip->ili_fields |= (flags | iip->ili_last_fields);
+ spin_unlock(&iip->ili_lock);
+
+ /*
+ * We are done with the log item transaction dirty state, so clear it so
+ * that it doesn't pollute future transactions.
+ */
+ iip->ili_dirty_flags = 0;
+ return 0;
+}
+
/*
* The logged size of an inode fork is always the current size of the inode
* fork. This means that when an inode fork is relogged, the size of the logged
@@ -662,6 +809,8 @@ xfs_inode_item_committing(
}
static const struct xfs_item_ops xfs_inode_item_ops = {
+ .iop_sort = xfs_inode_item_sort,
+ .iop_precommit = xfs_inode_item_precommit,
.iop_size = xfs_inode_item_size,
.iop_format = xfs_inode_item_format,
.iop_pin = xfs_inode_item_pin,
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bbd836a44ff0..377e06007804 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -17,6 +17,7 @@ struct xfs_inode_log_item {
struct xfs_log_item ili_item; /* common portion */
struct xfs_inode *ili_inode; /* inode ptr */
unsigned short ili_lock_flags; /* inode lock flags */
+ unsigned int ili_dirty_flags; /* dirty in current tx */
/*
* The ili_lock protects the interactions between the dirty state and
* the flush state of the inode log item. This allows us to do atomic
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 285885c308bd..18c8f168b153 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin(
if (eof)
imap.br_startoff = end_fsb; /* fake hole until the end */
- /* We never need to allocate blocks for zeroing a hole. */
- if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
+ /* We never need to allocate blocks for zeroing or unsharing a hole. */
+ if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
+ imap.br_startoff > offset_fsb) {
xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
goto out_unlock;
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 322eb2ee6c55..82c81d20459d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket(
* just to flush the inodegc queue and wait for it to
* complete.
*/
- xfs_inodegc_flush(mp);
+ error = xfs_inodegc_flush(mp);
+ if (error)
+ break;
}
prev_agino = agino;
@@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket(
}
if (prev_ip) {
+ int error2;
+
ip->i_prev_unlinked = prev_agino;
xfs_irele(prev_ip);
+
+ error2 = xfs_inodegc_flush(mp);
+ if (error2 && !error)
+ return error2;
}
- xfs_inodegc_flush(mp);
return error;
}
@@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag(
* bucket and remaining inodes on it unreferenced and
* unfreeable.
*/
- xfs_inodegc_flush(pag->pag_mount);
xlog_recover_clear_agi_bucket(pag, bucket);
}
}
@@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks(
for_each_perag(log->l_mp, agno, pag)
xlog_recover_iunlink_ag(pag);
-
- /*
- * Flush the pending unlinked inodes to ensure that the inactivations
- * are fully completed on disk and the incore inodes can be reclaimed
- * before we signal that recovery is complete.
- */
- xfs_inodegc_flush(log->l_mp);
}
STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f3269c0626f0..6c09f89534d3 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -62,10 +62,14 @@ struct xfs_error_cfg {
struct xfs_inodegc {
struct llist_head list;
struct delayed_work work;
+ int error;
/* approximate count of inodes in the list */
unsigned int items;
unsigned int shrinker_hits;
+#if defined(DEBUG) || defined(XFS_WARN)
+ unsigned int cpu;
+#endif
};
/*
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index f5dc46ce9803..abcc559f3c64 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -616,8 +616,10 @@ xfs_reflink_cancel_cow_blocks(
xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
del.br_blockcount);
- xfs_free_extent_later(*tpp, del.br_startblock,
+ error = xfs_free_extent_later(*tpp, del.br_startblock,
del.br_blockcount, NULL);
+ if (error)
+ break;
/* Roll the transaction */
error = xfs_defer_finish(tpp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 4d2e87462ac4..4120bd1cba90 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1095,8 +1095,12 @@ xfs_inodegc_init_percpu(
for_each_possible_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
+#if defined(DEBUG) || defined(XFS_WARN)
+ gc->cpu = cpu;
+#endif
init_llist_head(&gc->list);
gc->items = 0;
+ gc->error = 0;
INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
}
return 0;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8afc0c080861..8c0bfc9a33b1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -290,7 +290,9 @@ retry:
* Do not perform a synchronous scan because callers can hold
* other locks.
*/
- xfs_blockgc_flush_all(mp);
+ error = xfs_blockgc_flush_all(mp);
+ if (error)
+ return error;
want_retry = false;
goto retry;
}
@@ -970,6 +972,11 @@ __xfs_trans_commit(
error = xfs_defer_finish_noroll(&tp);
if (error)
goto out_unreserve;
+
+ /* Run precommits from final tx in defer chain. */
+ error = xfs_trans_run_precommits(tp);
+ if (error)
+ goto out_unreserve;
}
/*