aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/affs/affs.h22
-rw-r--r--fs/affs/amigaffs.c42
-rw-r--r--fs/affs/inode.c9
-rw-r--r--fs/affs/namei.c95
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/afs/dir.c14
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/root.c17
-rw-r--r--fs/block_dev.c13
-rw-r--r--fs/btrfs/backref.c7
-rw-r--r--fs/btrfs/btrfs_inode.h44
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/ctree.c128
-rw-r--r--fs/btrfs/ctree.h86
-rw-r--r--fs/btrfs/delayed-inode.c57
-rw-r--r--fs/btrfs/delayed-inode.h16
-rw-r--r--fs/btrfs/delayed-ref.c31
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dir-item.c5
-rw-r--r--fs/btrfs/disk-io.c29
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/export.c13
-rw-r--r--fs/btrfs/extent-tree.c282
-rw-r--r--fs/btrfs/extent_io.c213
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/file-item.c8
-rw-r--r--fs/btrfs/file.c19
-rw-r--r--fs/btrfs/free-space-cache.c65
-rw-r--r--fs/btrfs/free-space-cache.h7
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c588
-rw-r--r--fs/btrfs/ioctl.c109
-rw-r--r--fs/btrfs/ordered-data.c47
-rw-r--r--fs/btrfs/ordered-data.h4
-rw-r--r--fs/btrfs/props.c4
-rw-r--r--fs/btrfs/qgroup.c163
-rw-r--r--fs/btrfs/qgroup.h36
-rw-r--r--fs/btrfs/raid56.c2
-rw-r--r--fs/btrfs/relocation.c25
-rw-r--r--fs/btrfs/root-tree.c6
-rw-r--r--fs/btrfs/scrub.c12
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/transaction.c58
-rw-r--r--fs/btrfs/tree-log.c299
-rw-r--r--fs/btrfs/tree-log.h14
-rw-r--r--fs/btrfs/ulist.c10
-rw-r--r--fs/btrfs/ulist.h8
-rw-r--r--fs/btrfs/volumes.c18
-rw-r--r--fs/btrfs/xattr.c16
-rw-r--r--fs/buffer.c12
-rw-r--r--fs/ceph/addr.c29
-rw-r--r--fs/ceph/cache.c2
-rw-r--r--fs/ceph/caps.c40
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c32
-rw-r--r--fs/ceph/export.c3
-rw-r--r--fs/ceph/file.c106
-rw-r--r--fs/ceph/inode.c172
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c175
-rw-r--r--fs/ceph/mds_client.h15
-rw-r--r--fs/ceph/super.c9
-rw-r--r--fs/ceph/super.h14
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/dax.c60
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/ext2/file.c19
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c40
-rw-r--r--fs/ext4/inode.c15
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/f2fs/file.c7
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/gfs2/file.c8
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/iomap.c15
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/kernfs/dir.c2
-rw-r--r--fs/kernfs/file.c75
-rw-r--r--fs/kernfs/kernfs-internal.h2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/ncpfs/mmap.c7
-rw-r--r--fs/ncpfs/sock.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayout.c4
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c4
-rw-r--r--fs/nfs/objlayout/objlayout.c2
-rw-r--r--fs/nfsd/blocklayout.c6
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs2acl.c1
-rw-r--r--fs/nfsd/nfs3acl.c1
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4proc.c88
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/nfs4xdr.c29
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c70
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c16
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nfsd/vfs.c104
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--fs/nilfs2/alloc.c2
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/file.c3
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/mmap.c15
-rw-r--r--fs/orangefs/devorangefs-req.c5
-rw-r--r--fs/orangefs/inode.c6
-rw-r--r--fs/orangefs/orangefs-bufmap.c5
-rw-r--r--fs/orangefs/orangefs-debugfs.c15
-rw-r--r--fs/orangefs/orangefs-dev-proto.h3
-rw-r--r--fs/orangefs/orangefs-kernel.h1
-rw-r--r--fs/orangefs/orangefs-mod.c12
-rw-r--r--fs/orangefs/orangefs-sysfs.c32
-rw-r--r--fs/orangefs/orangefs-utils.c4
-rw-r--r--fs/orangefs/upcall.h1
-rw-r--r--fs/proc/base.c160
-rw-r--r--fs/proc/generic.c11
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/kcore.c5
-rw-r--r--fs/proc/root.c3
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/proc/vmcore.c8
-rw-r--r--fs/pstore/platform.c22
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/squashfs/lz4_wrapper.c12
-rw-r--r--fs/stat.c2
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/userfaultfd.c117
-rw-r--r--fs/xfs/xfs_aops.c16
-rw-r--r--fs/xfs/xfs_file.c42
-rw-r--r--fs/xfs/xfs_trace.h2
157 files changed, 2399 insertions, 2147 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6a0f3fa85ef7..3de3b4a89d89 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -534,11 +534,11 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
}
static int
-v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+v9fs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct v9fs_inode *v9inode;
struct page *page = vmf->page;
- struct file *filp = vma->vm_file;
+ struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 2f088773f1c0..2f8bab390d13 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -138,9 +138,9 @@ extern int affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh);
extern int affs_remove_header(struct dentry *dentry);
extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
-extern void secs_to_datestamp(time64_t secs, struct affs_date *ds);
-extern umode_t prot_to_mode(u32 prot);
-extern void mode_to_prot(struct inode *inode);
+extern void affs_secs_to_datestamp(time64_t secs, struct affs_date *ds);
+extern umode_t affs_prot_to_mode(u32 prot);
+extern void affs_mode_to_prot(struct inode *inode);
__printf(3, 4)
extern void affs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
@@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb);
/* namei.c */
+extern const struct export_operations affs_export_ops;
extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
extern int affs_unlink(struct inode *dir, struct dentry *dentry);
@@ -178,7 +179,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* inode.c */
-extern unsigned long affs_parent_ino(struct inode *dir);
extern struct inode *affs_new_inode(struct inode *dir);
extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
extern void affs_evict_inode(struct inode *inode);
@@ -213,6 +213,12 @@ extern const struct address_space_operations affs_aops_ofs;
extern const struct dentry_operations affs_dentry_operations;
extern const struct dentry_operations affs_intl_dentry_operations;
+static inline bool affs_validblock(struct super_block *sb, int block)
+{
+ return(block >= AFFS_SB(sb)->s_reserved &&
+ block < AFFS_SB(sb)->s_partition_size);
+}
+
static inline void
affs_set_blocksize(struct super_block *sb, int size)
{
@@ -222,7 +228,7 @@ static inline struct buffer_head *
affs_bread(struct super_block *sb, int block)
{
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+ if (affs_validblock(sb, block))
return sb_bread(sb, block);
return NULL;
}
@@ -230,7 +236,7 @@ static inline struct buffer_head *
affs_getblk(struct super_block *sb, int block)
{
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+ if (affs_validblock(sb, block))
return sb_getblk(sb, block);
return NULL;
}
@@ -239,7 +245,7 @@ affs_getzeroblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+ if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
lock_buffer(bh);
memset(bh->b_data, 0 , sb->s_blocksize);
@@ -254,7 +260,7 @@ affs_getemptyblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+ if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
wait_on_buffer(bh);
set_buffer_uptodate(bh);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 0ec65c133b93..b573c3b9a328 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh)
}
void
-secs_to_datestamp(time64_t secs, struct affs_date *ds)
+affs_secs_to_datestamp(time64_t secs, struct affs_date *ds)
{
u32 days;
u32 minute;
@@ -386,55 +386,55 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds)
}
umode_t
-prot_to_mode(u32 prot)
+affs_prot_to_mode(u32 prot)
{
umode_t mode = 0;
if (!(prot & FIBF_NOWRITE))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!(prot & FIBF_NOREAD))
- mode |= S_IRUSR;
+ mode |= 0400;
if (!(prot & FIBF_NOEXECUTE))
- mode |= S_IXUSR;
+ mode |= 0100;
if (prot & FIBF_GRP_WRITE)
- mode |= S_IWGRP;
+ mode |= 0020;
if (prot & FIBF_GRP_READ)
- mode |= S_IRGRP;
+ mode |= 0040;
if (prot & FIBF_GRP_EXECUTE)
- mode |= S_IXGRP;
+ mode |= 0010;
if (prot & FIBF_OTR_WRITE)
- mode |= S_IWOTH;
+ mode |= 0002;
if (prot & FIBF_OTR_READ)
- mode |= S_IROTH;
+ mode |= 0004;
if (prot & FIBF_OTR_EXECUTE)
- mode |= S_IXOTH;
+ mode |= 0001;
return mode;
}
void
-mode_to_prot(struct inode *inode)
+affs_mode_to_prot(struct inode *inode)
{
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
- if (!(mode & S_IXUSR))
+ if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
- if (!(mode & S_IRUSR))
+ if (!(mode & 0400))
prot |= FIBF_NOREAD;
- if (!(mode & S_IWUSR))
+ if (!(mode & 0200))
prot |= FIBF_NOWRITE;
- if (mode & S_IXGRP)
+ if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
- if (mode & S_IRGRP)
+ if (mode & 0040)
prot |= FIBF_GRP_READ;
- if (mode & S_IWGRP)
+ if (mode & 0020)
prot |= FIBF_GRP_WRITE;
- if (mode & S_IXOTH)
+ if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
- if (mode & S_IROTH)
+ if (mode & 0004)
prot |= FIBF_OTR_READ;
- if (mode & S_IWOTH)
+ if (mode & 0002)
prot |= FIBF_OTR_WRITE;
AFFS_I(inode)->i_protect = prot;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index fe4e1290dbb5..a5e6097eb5a9 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
if (affs_test_opt(sbi->s_flags, SF_SETMODE))
inode->i_mode = sbi->s_mode;
else
- inode->i_mode = prot_to_mode(prot);
+ inode->i_mode = affs_prot_to_mode(prot);
id = be16_to_cpu(tail->uid);
if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
@@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
}
tail = AFFS_TAIL(sb, bh);
if (tail->stype == cpu_to_be32(ST_ROOT)) {
- secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, bh)->root_change);
+ affs_secs_to_datestamp(inode->i_mtime.tv_sec,
+ &AFFS_ROOT_TAIL(sb, bh)->root_change);
} else {
tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect);
tail->size = cpu_to_be32(inode->i_size);
- secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
+ affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change);
if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
uid = i_uid_read(inode);
gid = i_gid_read(inode);
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
mark_inode_dirty(inode);
if (attr->ia_valid & ATTR_MODE)
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
out:
return error;
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 29186d29a3b6..96dd1d09a273 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -9,29 +9,10 @@
*/
#include "affs.h"
+#include <linux/exportfs.h>
typedef int (*toupper_t)(int);
-static int affs_toupper(int ch);
-static int affs_hash_dentry(const struct dentry *, struct qstr *);
-static int affs_compare_dentry(const struct dentry *dentry,
- unsigned int len, const char *str, const struct qstr *name);
-static int affs_intl_toupper(int ch);
-static int affs_intl_hash_dentry(const struct dentry *, struct qstr *);
-static int affs_intl_compare_dentry(const struct dentry *dentry,
- unsigned int len, const char *str, const struct qstr *name);
-
-const struct dentry_operations affs_dentry_operations = {
- .d_hash = affs_hash_dentry,
- .d_compare = affs_compare_dentry,
-};
-
-const struct dentry_operations affs_intl_dentry_operations = {
- .d_hash = affs_intl_hash_dentry,
- .d_compare = affs_intl_compare_dentry,
-};
-
-
/* Simple toupper() for DOS\1 */
static int
@@ -271,7 +252,7 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
return -ENOSPC;
inode->i_mode = mode;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
mark_inode_dirty(inode);
inode->i_op = &affs_file_inode_operations;
@@ -301,7 +282,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return -ENOSPC;
inode->i_mode = S_IFDIR | mode;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
inode->i_op = &affs_dir_inode_operations;
inode->i_fop = &affs_dir_operations;
@@ -347,7 +328,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
error = -EIO;
bh = affs_bread(sb, inode->i_ino);
@@ -465,3 +446,71 @@ done:
affs_brelse(bh);
return retval;
}
+
+static struct dentry *affs_get_parent(struct dentry *child)
+{
+ struct inode *parent;
+ struct buffer_head *bh;
+
+ bh = affs_bread(child->d_sb, d_inode(child)->i_ino);
+ if (!bh)
+ return ERR_PTR(-EIO);
+
+ parent = affs_iget(child->d_sb,
+ be32_to_cpu(AFFS_TAIL(child->d_sb, bh)->parent));
+ brelse(bh);
+ if (IS_ERR(parent))
+ return ERR_CAST(parent);
+
+ return d_obtain_alias(parent);
+}
+
+static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino,
+ u32 generation)
+{
+ struct inode *inode;
+
+ if (!affs_validblock(sb, ino))
+ return ERR_PTR(-ESTALE);
+
+ inode = affs_iget(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ if (generation && inode->i_generation != generation) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+
+ return inode;
+}
+
+static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ affs_nfs_get_inode);
+}
+
+static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ affs_nfs_get_inode);
+}
+
+const struct export_operations affs_export_ops = {
+ .fh_to_dentry = affs_fh_to_dentry,
+ .fh_to_parent = affs_fh_to_parent,
+ .get_parent = affs_get_parent,
+};
+
+const struct dentry_operations affs_dentry_operations = {
+ .d_hash = affs_hash_dentry,
+ .d_compare = affs_compare_dentry,
+};
+
+const struct dentry_operations affs_intl_dentry_operations = {
+ .d_hash = affs_intl_hash_dentry,
+ .d_compare = affs_intl_compare_dentry,
+};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d6384863192c..37532538e8ab 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait)
struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
lock_buffer(bh);
- secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
+ affs_secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
affs_fix_checksum(sb, bh);
unlock_buffer(bh);
@@ -507,6 +507,7 @@ got_root:
return -ENOMEM;
}
+ sb->s_export_op = &affs_export_ops;
pr_debug("s_flags=%lX\n", sb->s_flags);
return 0;
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 51a241e09fbb..949f960337f5 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -252,7 +252,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
/* skip entries marked unused in the bitmap */
if (!(block->pagehdr.bitmap[offset / 8] &
(1 << (offset % 8)))) {
- _debug("ENT[%Zu.%u]: unused",
+ _debug("ENT[%zu.%u]: unused",
blkoff / sizeof(union afs_dir_block), offset);
if (offset >= curr)
ctx->pos = blkoff +
@@ -266,7 +266,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
sizeof(*block) -
offset * sizeof(union afs_dirent));
- _debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
+ _debug("ENT[%zu.%u]: %s %zu \"%s\"",
blkoff / sizeof(union afs_dir_block), offset,
(offset < curr ? "skip" : "fill"),
nlen, dire->u.name);
@@ -274,23 +274,23 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
/* work out where the next possible entry is */
for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) {
if (next >= AFS_DIRENT_PER_BLOCK) {
- _debug("ENT[%Zu.%u]:"
+ _debug("ENT[%zu.%u]:"
" %u travelled beyond end dir block"
- " (len %u/%Zu)",
+ " (len %u/%zu)",
blkoff / sizeof(union afs_dir_block),
offset, next, tmp, nlen);
return -EIO;
}
if (!(block->pagehdr.bitmap[next / 8] &
(1 << (next % 8)))) {
- _debug("ENT[%Zu.%u]:"
- " %u unmarked extension (len %u/%Zu)",
+ _debug("ENT[%zu.%u]:"
+ " %u unmarked extension (len %u/%zu)",
blkoff / sizeof(union afs_dir_block),
offset, next, tmp, nlen);
return -EIO;
}
- _debug("ENT[%Zu.%u]: ext %u/%Zu",
+ _debug("ENT[%zu.%u]: ext %u/%zu",
blkoff / sizeof(union afs_dir_block),
next, tmp, nlen);
next++;
diff --git a/fs/aio.c b/fs/aio.c
index 873b4ca82ccb..7e2ab9c8e39c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx)
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED, 0, &unused);
+ MAP_SHARED, 0, &unused, NULL);
up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
ctx->mmap_size = 0;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 6f48d670c941..806df746f1a9 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -38,8 +38,6 @@
* which have been left busy at at service shutdown.
*/
-#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
-
typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *,
struct autofs_dev_ioctl *);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 82e8f6edfb48..d79ced925861 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
+ ino->last_used = jiffies;
}
- ino->last_used = jiffies;
return status;
}
@@ -321,16 +321,21 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
- struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
- ino = autofs4_dentry_ino(new);
- ino->last_used = jiffies;
- dput(path->dentry);
- path->dentry = new;
+ if (new == dentry)
+ dput(new);
+ else {
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(new);
+ ino->last_used = jiffies;
+ dput(path->dentry);
+ path->dentry = new;
+ }
}
return path->dentry;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 73031ec54a7b..77c30f15a02c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -989,7 +989,7 @@ struct block_device *bdget(dev_t dev)
bdev->bd_super = NULL;
bdev->bd_inode = inode;
bdev->bd_bdi = &noop_backing_dev_info;
- bdev->bd_block_size = (1 << inode->i_blkbits);
+ bdev->bd_block_size = i_blocksize(inode);
bdev->bd_part_count = 0;
bdev->bd_invalidated = 0;
inode->i_mode = S_IFBLK;
@@ -1043,13 +1043,22 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
- if (bdev) {
+ if (bdev && !inode_unhashed(bdev->bd_inode)) {
bdgrab(bdev);
spin_unlock(&bdev_lock);
return bdev;
}
spin_unlock(&bdev_lock);
+ /*
+ * i_bdev references block device inode that was already shut down
+ * (corresponding device got removed). Remove the reference and look
+ * up block device inode again just in case new device got
+ * reestablished under the same device number.
+ */
+ if (bdev)
+ bd_forget(inode);
+
bdev = bdget(inode->i_rdev);
if (bdev) {
spin_lock(&bdev_lock);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8299601a3549..7699e16784d3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -956,8 +956,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
/*
* add all inline backrefs for bytenr to the list
*/
-static int __add_inline_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, u64 bytenr,
+static int __add_inline_refs(struct btrfs_path *path, u64 bytenr,
int *info_level, struct list_head *prefs,
struct ref_root *ref_tree,
u64 *total_refs, u64 inum)
@@ -1284,7 +1283,7 @@ again:
*/
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
@@ -1354,7 +1353,7 @@ again:
if (key.objectid == bytenr &&
(key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY)) {
- ret = __add_inline_refs(fs_info, path, bytenr,
+ ret = __add_inline_refs(path, bytenr,
&info_level, &prefs,
ref_tree, &total_refs,
inum);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 1a8fa46ff87e..819a6d27218a 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -224,16 +224,16 @@ static inline void btrfs_insert_inode_hash(struct inode *inode)
__insert_inode_hash(inode, h);
}
-static inline u64 btrfs_ino(struct inode *inode)
+static inline u64 btrfs_ino(struct btrfs_inode *inode)
{
- u64 ino = BTRFS_I(inode)->location.objectid;
+ u64 ino = inode->location.objectid;
/*
* !ino: btree_inode
* type == BTRFS_ROOT_ITEM_KEY: subvol dir
*/
- if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY)
- ino = inode->i_ino;
+ if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
+ ino = inode->vfs_inode.i_ino;
return ino;
}
@@ -248,23 +248,21 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (root == root->fs_info->tree_root &&
- btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
+ btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID)
return true;
if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
-static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
+static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
- spin_lock(&BTRFS_I(inode)->lock);
- if (BTRFS_I(inode)->logged_trans == generation &&
- BTRFS_I(inode)->last_sub_trans <=
- BTRFS_I(inode)->last_log_commit &&
- BTRFS_I(inode)->last_sub_trans <=
- BTRFS_I(inode)->root->last_log_commit) {
+ spin_lock(&inode->lock);
+ if (inode->logged_trans == generation &&
+ inode->last_sub_trans <= inode->last_log_commit &&
+ inode->last_sub_trans <= inode->root->last_log_commit) {
/*
* After a ranged fsync we might have left some extent maps
* (that fall outside the fsync's range). So return false
@@ -272,10 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
* will be called and process those extent maps.
*/
smp_mb();
- if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
+ if (list_empty(&inode->extent_tree.modified_extents))
ret = 1;
}
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
return ret;
}
@@ -326,6 +324,24 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
&BTRFS_I(inode)->runtime_flags);
}
+static inline void btrfs_print_data_csum_error(struct inode *inode,
+ u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ /* Output minus objectid, which is more meaningful */
+ if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
+ btrfs_warn_rl(root->fs_info,
+ "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+ root->objectid, btrfs_ino(BTRFS_I(inode)),
+ logical_start, csum, csum_expected, mirror_num);
+ else
+ btrfs_warn_rl(root->fs_info,
+ "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+ root->objectid, btrfs_ino(BTRFS_I(inode)),
+ logical_start, csum, csum_expected, mirror_num);
+}
+
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c4444d6f439f..903c32c9eb22 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -124,10 +124,8 @@ static int check_compressed_csum(struct inode *inode,
kunmap_atomic(kaddr);
if (csum != *cb_sum) {
- btrfs_info(BTRFS_I(inode)->root->fs_info,
- "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
- btrfs_ino(inode), disk_start, csum, *cb_sum,
- cb->mirror_num);
+ btrfs_print_data_csum_error(inode, disk_start, csum,
+ *cb_sum, cb->mirror_num);
ret = -EIO;
goto fail;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a426dc822d4d..1192bc7d2ee7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -28,9 +28,9 @@
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int level);
-static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *ins_key,
- struct btrfs_path *path, int data_size, int extend);
+static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *ins_key, struct btrfs_path *path,
+ int data_size, int extend);
static int push_node_left(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct extent_buffer *dst,
@@ -426,7 +426,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
- tm = container_of(node, struct tree_mod_elem, node);
+ tm = rb_entry(node, struct tree_mod_elem, node);
if (tm->seq > min_seq)
continue;
rb_erase(node, tm_root);
@@ -460,7 +460,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
while (*new) {
- cur = container_of(*new, struct tree_mod_elem, node);
+ cur = rb_entry(*new, struct tree_mod_elem, node);
parent = *new;
if (cur->logical < tm->logical)
new = &((*new)->rb_left);
@@ -746,7 +746,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
- cur = container_of(node, struct tree_mod_elem, node);
+ cur = rb_entry(node, struct tree_mod_elem, node);
if (cur->logical < start) {
node = node->rb_left;
} else if (cur->logical > start) {
@@ -1074,7 +1074,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
}
- clean_tree_block(trans, fs_info, buf);
+ clean_tree_block(fs_info, buf);
*last_ref = 1;
}
return 0;
@@ -1326,7 +1326,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
next = rb_next(&tm->node);
if (!next)
break;
- tm = container_of(next, struct tree_mod_elem, node);
+ tm = rb_entry(next, struct tree_mod_elem, node);
if (tm->logical != first_tm->logical)
break;
}
@@ -1580,7 +1580,8 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
/*
* compare two keys in a memcmp fashion
*/
-static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
+static int comp_keys(const struct btrfs_disk_key *disk,
+ const struct btrfs_key *k2)
{
struct btrfs_key k1;
@@ -1592,7 +1593,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
/*
* same as comp_keys only with two btrfs_key's
*/
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
{
if (k1->objectid > k2->objectid)
return 1;
@@ -1732,8 +1733,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
* slot may point to max if the key is bigger than all of the keys
*/
static noinline int generic_bin_search(struct extent_buffer *eb,
- unsigned long p,
- int item_size, struct btrfs_key *key,
+ unsigned long p, int item_size,
+ const struct btrfs_key *key,
int max, int *slot)
{
int low = 0;
@@ -1802,7 +1803,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
* simple bin_search frontend that does the right thing for
* leaves vs nodes
*/
-static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
if (level == 0)
@@ -1819,7 +1820,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
slot);
}
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
return bin_search(eb, key, level, slot);
@@ -1937,7 +1938,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
path->locks[level] = 0;
path->nodes[level] = NULL;
- clean_tree_block(trans, fs_info, mid);
+ clean_tree_block(fs_info, mid);
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
@@ -1998,7 +1999,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- clean_tree_block(trans, fs_info, right);
+ clean_tree_block(fs_info, right);
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
@@ -2042,7 +2043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- clean_tree_block(trans, fs_info, mid);
+ clean_tree_block(fs_info, mid);
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
@@ -2437,10 +2438,9 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
* reada. -EAGAIN is returned and the search must be repeated.
*/
static int
-read_block_for_search(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer **eb_ret, int level, int slot,
- struct btrfs_key *key, u64 time_seq)
+read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
+ struct extent_buffer **eb_ret, int level, int slot,
+ const struct btrfs_key *key)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 blocknr;
@@ -2587,7 +2587,7 @@ done:
}
static void key_search_validate(struct extent_buffer *b,
- struct btrfs_key *key,
+ const struct btrfs_key *key,
int level)
{
#ifdef CONFIG_BTRFS_ASSERT
@@ -2606,7 +2606,7 @@ static void key_search_validate(struct extent_buffer *b,
#endif
}
-static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
int level, int *prev_cmp, int *slot)
{
if (*prev_cmp != 0) {
@@ -2668,9 +2668,9 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
* tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
* possible)
*/
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_path *p, int
- ins_len, int cow)
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
@@ -2870,8 +2870,8 @@ cow_done:
goto done;
}
- err = read_block_for_search(trans, root, p,
- &b, level, slot, key, 0);
+ err = read_block_for_search(root, p, &b, level,
+ slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -2953,7 +2953,7 @@ done:
* The resulting path and return value will be set up as if we called
* btrfs_search_slot at that point in time with ins_len and cow both set to 0.
*/
-int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -3014,8 +3014,8 @@ again:
goto done;
}
- err = read_block_for_search(NULL, root, p, &b, level,
- slot, key, time_seq);
+ err = read_block_for_search(root, p, &b, level,
+ slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -3067,8 +3067,9 @@ done:
* < 0 on error
*/
int btrfs_search_slot_for_read(struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_path *p,
- int find_higher, int return_any)
+ const struct btrfs_key *key,
+ struct btrfs_path *p, int find_higher,
+ int return_any)
{
int ret;
struct extent_buffer *leaf;
@@ -3166,7 +3167,7 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info,
*/
void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- struct btrfs_key *new_key)
+ const struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *eb;
@@ -3594,8 +3595,7 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
* min slot controls the lowest index we're willing to push to the
* right. We'll push up to and including min_slot, but no lower
*/
-static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
@@ -3704,7 +3704,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
- clean_tree_block(trans, fs_info, left);
+ clean_tree_block(fs_info, left);
btrfs_mark_buffer_dirty(right);
@@ -3716,7 +3716,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(trans, fs_info, path->nodes[0]);
+ clean_tree_block(fs_info, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3809,7 +3809,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}
- return __push_leaf_right(trans, fs_info, path, min_data_size, empty,
+ return __push_leaf_right(fs_info, path, min_data_size, empty,
right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
@@ -3825,8 +3825,7 @@ out_unlock:
* item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
* items
*/
-static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
@@ -3945,7 +3944,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
- clean_tree_block(trans, fs_info, right);
+ clean_tree_block(fs_info, right);
btrfs_item_key(right, &disk_key, 0);
fixup_low_keys(fs_info, path, &disk_key, 1);
@@ -4035,7 +4034,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, fs_info, path, min_data_size,
+ return __push_leaf_left(fs_info, path, min_data_size,
empty, left, free_space, right_nritems,
max_slot);
out:
@@ -4180,7 +4179,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
*/
static noinline int split_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_key *ins_key,
+ const struct btrfs_key *ins_key,
struct btrfs_path *path, int data_size,
int extend)
{
@@ -4412,10 +4411,9 @@ err:
return ret;
}
-static noinline int split_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+static noinline int split_item(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- struct btrfs_key *new_key,
+ const struct btrfs_key *new_key,
unsigned long split_offset)
{
struct extent_buffer *leaf;
@@ -4501,7 +4499,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key,
+ const struct btrfs_key *new_key,
unsigned long split_offset)
{
int ret;
@@ -4510,7 +4508,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- ret = split_item(trans, root->fs_info, path, new_key, split_offset);
+ ret = split_item(root->fs_info, path, new_key, split_offset);
return ret;
}
@@ -4525,7 +4523,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key)
+ const struct btrfs_key *new_key)
{
struct extent_buffer *leaf;
int ret;
@@ -4726,7 +4724,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
* that doesn't call btrfs_search_slot
*/
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
+ const struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4820,7 +4818,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
+ const struct btrfs_key *cpu_key, u32 *data_size,
int nr)
{
int ret = 0;
@@ -4851,9 +4849,9 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
* Given a key and some data, insert an item into the tree.
* This does all the path init required, making room in the tree if needed.
*/
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *cpu_key, void *data, u32
- data_size)
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *cpu_key, void *data,
+ u32 data_size)
{
int ret = 0;
struct btrfs_path *path;
@@ -5008,7 +5006,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
- clean_tree_block(trans, fs_info, leaf);
+ clean_tree_block(fs_info, leaf);
btrfs_del_leaf(trans, root, path, leaf);
}
} else {
@@ -5243,7 +5241,7 @@ out:
static int tree_move_down(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- int *level, int root_level)
+ int *level)
{
struct extent_buffer *eb;
@@ -5258,8 +5256,7 @@ static int tree_move_down(struct btrfs_fs_info *fs_info,
return 0;
}
-static int tree_move_next_or_upnext(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static int tree_move_next_or_upnext(struct btrfs_path *path,
int *level, int root_level)
{
int ret = 0;
@@ -5298,10 +5295,9 @@ static int tree_advance(struct btrfs_fs_info *fs_info,
int ret;
if (*level == 0 || !allow_down) {
- ret = tree_move_next_or_upnext(fs_info, path, level,
- root_level);
+ ret = tree_move_next_or_upnext(path, level, root_level);
} else {
- ret = tree_move_down(fs_info, path, level, root_level);
+ ret = tree_move_down(fs_info, path, level);
}
if (ret >= 0) {
if (*level == 0)
@@ -5784,8 +5780,8 @@ again:
next = c;
next_rw_lock = path->locks[level];
- ret = read_block_for_search(NULL, root, path, &next, level,
- slot, &key, 0);
+ ret = read_block_for_search(root, path, &next, level,
+ slot, &key);
if (ret == -EAGAIN)
goto again;
@@ -5834,8 +5830,8 @@ again:
if (!level)
break;
- ret = read_block_for_search(NULL, root, path, &next, level,
- 0, &key, 0);
+ ret = read_block_for_search(root, path, &next, level,
+ 0, &key);
if (ret == -EAGAIN)
goto again;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6a823719b6c5..105d4d43993e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -97,6 +97,14 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
+/*
+ * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+ */
+static inline u32 count_max_extents(u64 size)
+{
+ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+}
+
struct btrfs_mapping_tree {
struct extent_map_tree map_tree;
};
@@ -1953,7 +1961,7 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
- struct btrfs_disk_key *disk)
+ const struct btrfs_disk_key *disk)
{
cpu->offset = le64_to_cpu(disk->offset);
cpu->type = disk->type;
@@ -1961,7 +1969,7 @@ static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
}
static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
- struct btrfs_key *cpu)
+ const struct btrfs_key *cpu)
{
disk->offset = cpu_to_le64(cpu->offset);
disk->type = cpu->type;
@@ -1993,8 +2001,7 @@ static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
btrfs_disk_key_to_cpu(key, &disk_key);
}
-
-static inline u8 btrfs_key_type(struct btrfs_key *key)
+static inline u8 btrfs_key_type(const struct btrfs_key *key)
{
return key->type;
}
@@ -2577,8 +2584,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_cross_ref_exist(struct btrfs_root *root,
u64 objectid, u64 offset, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
@@ -2587,10 +2593,11 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int get_block_group_index(struct btrfs_block_group_cache *cache);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 parent,
- u64 root_objectid,
- struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size);
+ struct btrfs_root *root,
+ u64 parent, u64 root_objectid,
+ const struct btrfs_disk_key *key,
+ int level, u64 hint,
+ u64 empty_size);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
@@ -2623,8 +2630,7 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len, int delalloc);
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len);
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2696,8 +2702,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
int nitems,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- u64 qgroup_reserved);
+ struct btrfs_block_rsv *rsv);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
@@ -2724,7 +2729,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-int btrfs_inc_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
@@ -2750,9 +2755,9 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end);
/* ctree.c */
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot);
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
@@ -2760,7 +2765,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid);
void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- struct btrfs_key *new_key);
+ const struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -2802,22 +2807,23 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key,
+ const struct btrfs_key *new_key,
unsigned long split_offset);
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *new_key);
+ const struct btrfs_key *new_key);
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_path *p, int
- ins_len, int cow);
-int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, struct btrfs_path *p,
+ int ins_len, int cow);
+int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq);
int btrfs_search_slot_for_read(struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_path *p,
- int find_higher, int return_any);
+ const struct btrfs_key *key,
+ struct btrfs_path *p, int find_higher,
+ int return_any);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, u64 *last_ret,
@@ -2840,19 +2846,20 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
}
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
+ const struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr);
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, void *data, u32 data_size);
+int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key, void *data, u32 data_size);
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size, int nr);
+ const struct btrfs_key *cpu_key, u32 *data_size,
+ int nr);
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct btrfs_key *key,
+ const struct btrfs_key *key,
u32 data_size)
{
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
@@ -2941,15 +2948,15 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
const char *name, int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key);
-int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_root_item
- *item);
+ const struct btrfs_key *key);
+int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ const struct btrfs_key *key,
+ struct btrfs_root_item *item);
int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *key,
struct btrfs_root_item *item);
-int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key);
int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info);
@@ -3119,7 +3126,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len);
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct inode *parent_inode, struct inode *inode,
@@ -3147,7 +3154,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+int btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
@@ -3447,7 +3454,8 @@ do { \
"BTRFS: Transaction aborted (error %d)\n", \
(errno)); \
} else { \
- pr_debug("BTRFS: Transaction aborted (error %d)\n", \
+ btrfs_debug((trans)->fs_info, \
+ "Transaction aborted (error %d)", \
(errno)); \
} \
} \
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 80982a83c9fd..f7a6ee5ccc80 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -72,14 +72,14 @@ static inline int btrfs_is_continuous_delayed_item(
return 0;
}
-static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(
+ struct btrfs_inode *btrfs_inode)
{
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(btrfs_inode);
struct btrfs_delayed_node *node;
- node = ACCESS_ONCE(btrfs_inode->delayed_node);
+ node = READ_ONCE(btrfs_inode->delayed_node);
if (node) {
atomic_inc(&node->refs);
return node;
@@ -107,16 +107,15 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
/* Will return either the node or PTR_ERR(-ENOMEM) */
static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct inode *inode)
+ struct btrfs_inode *btrfs_inode)
{
struct btrfs_delayed_node *node;
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(btrfs_inode);
int ret;
again:
- node = btrfs_get_delayed_node(inode);
+ node = btrfs_get_delayed_node(btrfs_inode);
if (node)
return node;
@@ -574,7 +573,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_delayed_node *node)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -603,13 +602,13 @@ static int btrfs_delayed_inode_reserve_metadata(
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
- spin_lock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
- &BTRFS_I(inode)->runtime_flags))
+ &inode->runtime_flags))
release = true;
else
src_rsv = NULL;
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_unlock(&inode->lock);
}
/*
@@ -1196,7 +1195,7 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
}
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
@@ -1233,9 +1232,9 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return ret;
}
-int btrfs_commit_inode_delayed_inode(struct inode *inode)
+int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
@@ -1288,15 +1287,15 @@ out:
return ret;
}
-void btrfs_remove_delayed_node(struct inode *inode)
+void btrfs_remove_delayed_node(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
- delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
+ delayed_node = READ_ONCE(inode->delayed_node);
if (!delayed_node)
return;
- BTRFS_I(inode)->delayed_node = NULL;
+ inode->delayed_node = NULL;
btrfs_release_delayed_node(delayed_node);
}
@@ -1434,7 +1433,7 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
const char *name, int name_len,
- struct inode *dir,
+ struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index)
{
@@ -1510,7 +1509,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct inode *dir, u64 index)
+ struct btrfs_inode *dir, u64 index)
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
@@ -1558,7 +1557,7 @@ end:
return ret;
}
-int btrfs_inode_delayed_dir_index_count(struct inode *inode)
+int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
@@ -1575,7 +1574,7 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
return -EINVAL;
}
- BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
+ inode->index_cnt = delayed_node->index_cnt;
btrfs_release_delayed_node(delayed_node);
return 0;
}
@@ -1587,7 +1586,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
if (!delayed_node)
return false;
@@ -1776,7 +1775,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
- delayed_node = btrfs_get_delayed_node(inode);
+ delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
if (!delayed_node)
return -ENOENT;
@@ -1831,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *delayed_node;
int ret = 0;
- delayed_node = btrfs_get_or_create_delayed_node(inode);
+ delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@@ -1841,7 +1840,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
goto release_node;
}
- ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
+ ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
delayed_node);
if (ret)
goto release_node;
@@ -1856,9 +1855,9 @@ release_node:
return ret;
}
-int btrfs_delayed_delete_inode_ref(struct inode *inode)
+int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_delayed_node *delayed_node;
/*
@@ -1933,7 +1932,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
mutex_unlock(&delayed_node->mutex);
}
-void btrfs_kill_delayed_inode_items(struct inode *inode)
+void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 8a2bf5e3e4cf..40327cc3b99a 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -101,15 +101,15 @@ static inline void btrfs_init_delayed_root(
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
const char *name, int name_len,
- struct inode *dir,
+ struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index);
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct inode *dir, u64 index);
+ struct btrfs_inode *dir, u64 index);
-int btrfs_inode_delayed_dir_index_count(struct inode *inode);
+int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
@@ -119,17 +119,17 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct inode *inode);
+ struct btrfs_inode *inode);
/* Used for evicting the inode. */
-void btrfs_remove_delayed_node(struct inode *inode);
-void btrfs_kill_delayed_inode_items(struct inode *inode);
-int btrfs_commit_inode_delayed_inode(struct inode *inode);
+void btrfs_remove_delayed_node(struct btrfs_inode *inode);
+void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode);
+int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
-int btrfs_delayed_delete_inode_ref(struct inode *inode);
+int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ef724a5fc30e..6eb80952efb3 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *ref,
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
- int action, int is_data)
+ int action, int is_data, int *qrecord_inserted_ret)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
+ int qrecord_inserted = 0;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
@@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
if(btrfs_qgroup_trace_extent_nolock(fs_info,
delayed_refs, qrecord))
kfree(qrecord);
+ else
+ qrecord_inserted = 1;
}
spin_lock_init(&head_ref->lock);
@@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
+ if (qrecord_inserted_ret)
+ *qrecord_inserted_ret = qrecord_inserted;
return head_ref;
}
@@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
+ int qrecord_inserted;
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
- bytenr, num_bytes, 0, 0, action, 0);
+ bytenr, num_bytes, 0, 0, action, 0,
+ &qrecord_inserted);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
spin_unlock(&delayed_refs->lock);
+ if (qrecord_inserted)
+ return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
free_head_ref:
@@ -829,15 +838,14 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- struct btrfs_delayed_extent_op *extent_op)
+ u64 owner, u64 offset, u64 reserved, int action)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
+ int qrecord_inserted;
- BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
@@ -859,7 +867,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
}
}
- head_ref->extent_op = extent_op;
+ head_ref->extent_op = NULL;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
@@ -870,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, ref_root, reserved,
- action, 1);
+ action, 1, &qrecord_inserted);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action);
spin_unlock(&delayed_refs->lock);
+ if (qrecord_inserted)
+ return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
}
@@ -899,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data);
+ extent_op->is_data, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
@@ -911,11 +921,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
* the head node if any where found, or NULL if not.
*/
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
{
- struct btrfs_delayed_ref_root *delayed_refs;
-
- delayed_refs = &trans->transaction->delayed_refs;
return find_ref_head(&delayed_refs->href_root, bytenr, 0);
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 50947b5a9152..0e537f98f1a1 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,8 +250,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- struct btrfs_delayed_extent_op *extent_op);
+ u64 owner, u64 offset, u64 reserved, int action);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
@@ -262,7 +261,8 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
+btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+ u64 bytenr);
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index b039fe0c751a..724504a2d7ac 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -133,7 +133,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
struct btrfs_disk_key disk_key;
u32 data_size;
- key.objectid = btrfs_ino(dir);
+ key.objectid = btrfs_ino(BTRFS_I(dir));
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
@@ -174,8 +174,7 @@ second_insert:
btrfs_release_path(path);
ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
- name_len, dir, &disk_key, type,
- index);
+ name_len, BTRFS_I(dir), &disk_key, type, index);
out_free:
btrfs_free_path(path);
if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 37a31b12bb0c..207db0270b15 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -64,8 +64,7 @@
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
- int read_only);
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -1005,7 +1004,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
return ret;
}
-static int check_async_write(struct inode *inode, unsigned long bio_flags)
+static int check_async_write(unsigned long bio_flags)
{
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
@@ -1021,7 +1020,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int async = check_async_write(inode, bio_flags);
+ int async = check_async_write(bio_flags);
int ret;
if (bio_op(bio) != REQ_OP_WRITE) {
@@ -1248,8 +1247,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
}
-void clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
+void clean_tree_block(struct btrfs_fs_info *fs_info,
struct extent_buffer *buf)
{
if (btrfs_header_generation(buf) ==
@@ -2802,7 +2800,7 @@ int open_ctree(struct super_block *sb,
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
- ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+ ret = btrfs_check_super_valid(fs_info);
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
@@ -3411,7 +3409,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
*/
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb,
- int do_barriers, int wait, int max_mirrors)
+ int wait, int max_mirrors)
{
struct buffer_head *bh;
int i;
@@ -3696,7 +3694,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
return num_tolerated_disk_barrier_failures;
}
-static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
+int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
{
struct list_head *head;
struct btrfs_device *dev;
@@ -3753,7 +3751,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
- ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
+ ret = write_dev_supers(dev, sb, 0, max_mirrors);
if (ret)
total_errors++;
}
@@ -3776,7 +3774,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
- ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
+ ret = write_dev_supers(dev, sb, 1, max_mirrors);
if (ret)
total_errors++;
}
@@ -3790,12 +3788,6 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
return 0;
}
-int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, int max_mirrors)
-{
- return write_all_supers(fs_info, max_mirrors);
-}
-
/* Drop a fs root from the radix tree and free it. */
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root)
@@ -4122,8 +4114,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
}
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
- int read_only)
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
{
struct btrfs_super_block *sb = fs_info->super_copy;
u64 nodesize = btrfs_super_nodesize(sb);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 44dcd9af6b7c..0be2d4fe705b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,14 +52,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
-void clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
+void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
void close_ctree(struct btrfs_fs_info *fs_info);
-int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, int max_mirrors);
+int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
struct buffer_head **bh_ret);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 340d90751263..87144c9f9593 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -30,7 +30,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
- fid->objectid = btrfs_ino(inode);
+ fid->objectid = btrfs_ino(BTRFS_I(inode));
fid->root_objectid = BTRFS_I(inode)->root->objectid;
fid->gen = inode->i_generation;
@@ -166,13 +166,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
if (!path)
return ERR_PTR(-ENOMEM);
- if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(dir)) == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = fs_info->tree_root;
} else {
- key.objectid = btrfs_ino(dir);
+ key.objectid = btrfs_ino(BTRFS_I(dir));
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
}
@@ -235,13 +235,10 @@ static int btrfs_get_name(struct dentry *parent, char *name,
int ret;
u64 ino;
- if (!dir || !inode)
- return -EINVAL;
-
if (!S_ISDIR(dir->i_mode))
return -EINVAL;
- ino = btrfs_ino(inode);
+ ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -255,7 +252,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
root = fs_info->tree_root;
} else {
key.objectid = ino;
- key.offset = btrfs_ino(dir);
+ key.offset = btrfs_ino(BTRFS_I(dir));
key.type = BTRFS_INODE_REF_KEY;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dcd2e798767e..c35b96633554 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -888,7 +888,7 @@ search_again:
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
@@ -1035,10 +1035,11 @@ out_free:
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 owner, u32 extra_size)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_extent_item *item;
struct btrfs_extent_item_v0 *ei0;
struct btrfs_extent_ref_v0 *ref0;
@@ -1092,7 +1093,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
return ret;
BUG_ON(ret); /* Corruption */
- btrfs_extend_item(root->fs_info, path, new_size);
+ btrfs_extend_item(fs_info, path, new_size);
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1151,12 +1152,13 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
}
static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid,
u64 owner, u64 offset)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct btrfs_extent_data_ref *ref;
struct extent_buffer *leaf;
@@ -1238,12 +1240,13 @@ fail:
}
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid, u64 owner,
u64 offset, int refs_to_add)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
u32 size;
@@ -1317,7 +1320,7 @@ fail:
}
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
int refs_to_drop, int *last_ref)
{
@@ -1354,7 +1357,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
num_refs -= refs_to_drop;
if (num_refs == 0) {
- ret = btrfs_del_item(trans, root, path);
+ ret = btrfs_del_item(trans, fs_info->extent_root, path);
*last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
@@ -1416,11 +1419,12 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
}
static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
{
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
int ret;
@@ -1449,7 +1453,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
}
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
@@ -1466,7 +1470,8 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
key.offset = root_objectid;
}
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+ ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
+ path, &key, 0);
btrfs_release_path(path);
return ret;
}
@@ -1524,14 +1529,14 @@ static int find_next_key(struct btrfs_path *path, int level,
*/
static noinline_for_stack
int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int insert)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
@@ -1614,7 +1619,7 @@ again:
err = -ENOENT;
goto out;
}
- ret = convert_extent_item_v0(trans, root, path, owner,
+ ret = convert_extent_item_v0(trans, fs_info, path, owner,
extra_size);
if (ret < 0) {
err = ret;
@@ -1716,7 +1721,7 @@ out:
* helper to add new inline back ref
*/
static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_root *root,
+void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
u64 parent, u64 root_objectid,
@@ -1739,7 +1744,7 @@ void setup_inline_extent_backref(struct btrfs_root *root,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- btrfs_extend_item(root->fs_info, path, size);
+ btrfs_extend_item(fs_info, path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1777,7 +1782,7 @@ void setup_inline_extent_backref(struct btrfs_root *root,
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -1785,7 +1790,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
{
int ret;
- ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
+ ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
bytenr, num_bytes, parent,
root_objectid, owner, offset, 0);
if (ret != -ENOENT)
@@ -1795,11 +1800,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
*ref_ret = NULL;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
- root_objectid);
+ ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
+ parent, root_objectid);
} else {
- ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
- root_objectid, owner, offset);
+ ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
+ parent, root_objectid, owner,
+ offset);
}
return ret;
}
@@ -1808,7 +1814,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
* helper to update/remove inline back ref
*/
static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_root *root,
+void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
@@ -1866,14 +1872,14 @@ void update_inline_extent_backref(struct btrfs_root *root,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- btrfs_truncate_item(root->fs_info, path, item_size, 1);
+ btrfs_truncate_item(fs_info, path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
}
static noinline_for_stack
int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner,
@@ -1883,15 +1889,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_extent_inline_ref *iref;
int ret;
- ret = lookup_inline_extent_backref(trans, root, path, &iref,
+ ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
bytenr, num_bytes, parent,
root_objectid, owner, offset, 1);
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- update_inline_extent_backref(root, path, iref,
+ update_inline_extent_backref(fs_info, path, iref,
refs_to_add, extent_op, NULL);
} else if (ret == -ENOENT) {
- setup_inline_extent_backref(root, path, iref, parent,
+ setup_inline_extent_backref(fs_info, path, iref, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
ret = 0;
@@ -1900,7 +1906,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
}
static int insert_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add)
@@ -1908,10 +1914,10 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
int ret;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
BUG_ON(refs_to_add != 1);
- ret = insert_tree_block_ref(trans, root, path, bytenr,
+ ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
parent, root_objectid);
} else {
- ret = insert_extent_data_ref(trans, root, path, bytenr,
+ ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
parent, root_objectid,
owner, offset, refs_to_add);
}
@@ -1919,7 +1925,7 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
}
static int remove_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data, int *last_ref)
@@ -1928,14 +1934,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
- update_inline_extent_backref(root, path, iref,
+ update_inline_extent_backref(fs_info, path, iref,
-refs_to_drop, NULL, last_ref);
} else if (is_data) {
- ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+ ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
last_ref);
} else {
*last_ref = 1;
- ret = btrfs_del_item(trans, root, path);
+ ret = btrfs_del_item(trans, fs_info->extent_root, path);
}
return ret;
}
@@ -2089,7 +2095,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes, parent, root_objectid,
owner, offset, 0,
- BTRFS_ADD_DELAYED_REF, NULL);
+ BTRFS_ADD_DELAYED_REF);
}
return ret;
}
@@ -2117,9 +2123,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
- ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset,
+ ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset,
refs_to_add, extent_op);
if ((ret < 0 && ret != -EAGAIN) || !ret)
goto out;
@@ -2143,9 +2149,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* now insert the actual backref */
- ret = insert_extent_backref(trans, fs_info->extent_root,
- path, bytenr, parent, root_objectid,
- owner, offset, refs_to_add);
+ ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
+ root_objectid, owner, offset, refs_to_add);
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -2290,8 +2295,7 @@ again:
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
- ret = convert_extent_item_v0(trans, fs_info->extent_root,
- path, (u64)-1, 0);
+ ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
if (ret < 0) {
err = ret;
goto out;
@@ -3028,8 +3032,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline int check_delayed_ref(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
{
@@ -3037,11 +3040,16 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_data_ref *data_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_transaction *cur_trans;
int ret = 0;
- delayed_refs = &trans->transaction->delayed_refs;
+ cur_trans = root->fs_info->running_transaction;
+ if (!cur_trans)
+ return 0;
+
+ delayed_refs = &cur_trans->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head) {
spin_unlock(&delayed_refs->lock);
return 0;
@@ -3090,8 +3098,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
return ret;
}
-static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline int check_committed_ref(struct btrfs_root *root,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
{
@@ -3162,9 +3169,8 @@ out:
return ret;
}
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 offset, u64 bytenr)
+int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
+ u64 bytenr)
{
struct btrfs_path *path;
int ret;
@@ -3175,12 +3181,12 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
return -ENOENT;
do {
- ret = check_committed_ref(trans, root, path, objectid,
+ ret = check_committed_ref(root, path, objectid,
offset, bytenr);
if (ret && ret != -ENOENT)
goto out;
- ret2 = check_delayed_ref(trans, root, path, objectid,
+ ret2 = check_delayed_ref(root, path, objectid,
offset, bytenr);
} while (ret2 == -EAGAIN);
@@ -3368,7 +3374,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
if (trans->aborted)
return 0;
again:
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
ret = PTR_ERR(inode);
btrfs_release_path(path);
@@ -3382,7 +3388,8 @@ again:
if (block_group->ro)
goto out_free;
- ret = create_free_space_inode(root, trans, block_group, path);
+ ret = create_free_space_inode(fs_info, trans, block_group,
+ path);
if (ret)
goto out_free;
goto again;
@@ -3424,7 +3431,7 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
+ ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
if (ret)
goto out_put;
}
@@ -4119,6 +4126,15 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return ret;
}
+static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+ bool may_use_included)
+{
+ ASSERT(s_info);
+ return s_info->bytes_used + s_info->bytes_reserved +
+ s_info->bytes_pinned + s_info->bytes_readonly +
+ (may_use_included ? s_info->bytes_may_use : 0);
+}
+
int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
@@ -4144,9 +4160,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
- used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
- data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
- data_sinfo->bytes_may_use;
+ used = btrfs_space_info_used(data_sinfo, true);
if (used + bytes > data_sinfo->total_bytes) {
struct btrfs_trans_handle *trans;
@@ -4421,9 +4435,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
- left = info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly -
- info->bytes_may_use;
+ left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
num_devs = get_profile_num_devs(fs_info, type);
@@ -4606,8 +4618,7 @@ static int can_overcommit(struct btrfs_root *root,
return 0;
profile = btrfs_get_alloc_profile(root, 0);
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly;
+ used = btrfs_space_info_used(space_info, false);
/*
* We only want to allow over committing if we have lots of actual space
@@ -4787,11 +4798,10 @@ skip_async:
* get us somewhere and then commit the transaction if it does. Otherwise it
* will return -ENOSPC.
*/
-static int may_commit_transaction(struct btrfs_root *root,
+static int may_commit_transaction(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 bytes, int force)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
struct btrfs_trans_handle *trans;
@@ -4823,7 +4833,7 @@ static int may_commit_transaction(struct btrfs_root *root,
spin_unlock(&delayed_rsv->lock);
commit:
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction(fs_info->fs_root);
if (IS_ERR(trans))
return -ENOSPC;
@@ -4837,11 +4847,11 @@ struct reserve_ticket {
wait_queue_head_t wait;
};
-static int flush_space(struct btrfs_root *root,
+static int flush_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 num_bytes,
u64 orig_bytes, int state)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *root = fs_info->fs_root;
struct btrfs_trans_handle *trans;
int nr;
int ret = 0;
@@ -4881,7 +4891,8 @@ static int flush_space(struct btrfs_root *root,
ret = 0;
break;
case COMMIT_TRANS:
- ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+ ret = may_commit_transaction(fs_info, space_info,
+ orig_bytes, 0);
break;
default:
ret = -ENOSPC;
@@ -4993,8 +5004,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
struct reserve_ticket *ticket;
int ret;
- ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
- to_reclaim, flush_state);
+ ret = flush_space(fs_info, space_info, to_reclaim, to_reclaim,
+ flush_state);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
@@ -5049,8 +5060,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
spin_unlock(&space_info->lock);
do {
- flush_space(fs_info->fs_root, space_info, to_reclaim,
- to_reclaim, flush_state);
+ flush_space(fs_info, space_info, to_reclaim, to_reclaim,
+ flush_state);
flush_state++;
spin_lock(&space_info->lock);
if (ticket->bytes == 0) {
@@ -5135,9 +5146,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
spin_lock(&space_info->lock);
ret = -ENOSPC;
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ used = btrfs_space_info_used(space_info, true);
/*
* If we have enough space then hooray, make our reservation and carry
@@ -5630,9 +5639,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = min_t(u64, num_bytes, SZ_512M);
if (block_rsv->reserved < block_rsv->size) {
- num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly +
- sinfo->bytes_may_use;
+ num_bytes = btrfs_space_info_used(sinfo, true);
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
num_bytes = min(num_bytes,
@@ -5756,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
trace_btrfs_space_reservation(fs_info, "orphan",
- btrfs_ino(inode), num_bytes, 1);
+ btrfs_ino(BTRFS_I(inode)), num_bytes, 1);
return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
}
@@ -5767,7 +5774,7 @@ void btrfs_orphan_release_metadata(struct inode *inode)
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
trace_btrfs_space_reservation(fs_info, "orphan",
- btrfs_ino(inode), num_bytes, 0);
+ btrfs_ino(BTRFS_I(inode)), num_bytes, 0);
btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
}
@@ -5799,7 +5806,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
/* One for parent inode, two for dir entries */
num_bytes = 3 * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta(root, num_bytes);
+ ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
if (ret)
return ret;
} else {
@@ -5824,8 +5831,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv,
- u64 qgroup_reserved)
+ struct btrfs_block_rsv *rsv)
{
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
}
@@ -5844,11 +5850,9 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
{
unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
- unsigned num_extents = 0;
+ unsigned num_extents;
- num_extents = (unsigned)div64_u64(num_bytes +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(num_bytes);
ASSERT(num_extents);
ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
BTRFS_I(inode)->outstanding_extents -= num_extents;
@@ -5927,7 +5931,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
u64 to_reserve = 0;
u64 csum_bytes;
- unsigned nr_extents = 0;
+ unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
bool delalloc_lock = true;
@@ -5960,9 +5964,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
- nr_extents = (unsigned)div64_u64(num_bytes +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ nr_extents = count_max_extents(num_bytes);
BTRFS_I(inode)->outstanding_extents += nr_extents;
nr_extents = 0;
@@ -5979,7 +5981,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
ret = btrfs_qgroup_reserve_meta(root,
- nr_extents * fs_info->nodesize);
+ nr_extents * fs_info->nodesize, true);
if (ret)
goto out_fail;
}
@@ -6005,7 +6007,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (to_reserve)
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), to_reserve, 1);
+ btrfs_ino(BTRFS_I(inode)), to_reserve, 1);
if (release_extra)
btrfs_block_rsv_release(fs_info, block_rsv,
btrfs_calc_trans_metadata_size(fs_info, 1));
@@ -6068,7 +6070,7 @@ out_fail:
if (to_free) {
btrfs_block_rsv_release(fs_info, block_rsv, to_free);
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
+ btrfs_ino(BTRFS_I(inode)), to_free, 0);
}
if (delalloc_lock)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
@@ -6104,7 +6106,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
return;
trace_btrfs_space_reservation(fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
+ btrfs_ino(BTRFS_I(inode)), to_free, 0);
btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
}
@@ -6561,8 +6563,7 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
spin_unlock(&space_info->lock);
return ret;
}
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
{
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
@@ -6845,7 +6846,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (is_data)
skinny_metadata = 0;
- ret = lookup_extent_backref(trans, extent_root, path, &iref,
+ ret = lookup_extent_backref(trans, info, path, &iref,
bytenr, num_bytes, parent,
root_objectid, owner_objectid,
owner_offset);
@@ -6877,8 +6878,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
#endif
if (!found_extent) {
BUG_ON(iref);
- ret = remove_extent_backref(trans, extent_root, path,
- NULL, refs_to_drop,
+ ret = remove_extent_backref(trans, info, path, NULL,
+ refs_to_drop,
is_data, &last_ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -6953,8 +6954,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
BUG_ON(found_extent || extent_slot != path->slots[0]);
- ret = convert_extent_item_v0(trans, extent_root, path,
- owner_objectid, 0);
+ ret = convert_extent_item_v0(trans, info, path, owner_objectid,
+ 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7021,7 +7022,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
}
if (found_extent) {
- ret = remove_extent_backref(trans, extent_root, path,
+ ret = remove_extent_backref(trans, info, path,
iref, refs_to_drop,
is_data, &last_ref);
if (ret) {
@@ -7095,7 +7096,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (!head)
goto out_delayed_unlock;
@@ -7244,7 +7245,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
num_bytes,
parent, root_objectid, owner,
offset, 0,
- BTRFS_DROP_DELAYED_REF, NULL);
+ BTRFS_DROP_DELAYED_REF);
}
return ret;
}
@@ -7419,12 +7420,11 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
* If there is no suitable free space, we will record the max size of
* the free space extent currently.
*/
-static noinline int find_free_extent(struct btrfs_root *orig_root,
+static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
u64 ram_bytes, u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
u64 flags, int delalloc)
{
- struct btrfs_fs_info *fs_info = orig_root->fs_info;
int ret = 0;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_free_cluster *last_ptr = NULL;
@@ -7716,18 +7716,20 @@ unclustered_alloc:
last_ptr->fragmented = 1;
spin_unlock(&last_ptr->lock);
}
- spin_lock(&block_group->free_space_ctl->tree_lock);
- if (cached &&
- block_group->free_space_ctl->free_space <
- num_bytes + empty_cluster + empty_size) {
- if (block_group->free_space_ctl->free_space >
- max_extent_size)
- max_extent_size =
- block_group->free_space_ctl->free_space;
- spin_unlock(&block_group->free_space_ctl->tree_lock);
- goto loop;
+ if (cached) {
+ struct btrfs_free_space_ctl *ctl =
+ block_group->free_space_ctl;
+
+ spin_lock(&ctl->tree_lock);
+ if (ctl->free_space <
+ num_bytes + empty_cluster + empty_size) {
+ if (ctl->free_space > max_extent_size)
+ max_extent_size = ctl->free_space;
+ spin_unlock(&ctl->tree_lock);
+ goto loop;
+ }
+ spin_unlock(&ctl->tree_lock);
}
- spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size,
@@ -7908,9 +7910,8 @@ static void dump_space_info(struct btrfs_fs_info *fs_info,
spin_lock(&info->lock);
btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
info->flags,
- info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly -
- info->bytes_may_use, (info->full) ? "" : "not ");
+ info->total_bytes - btrfs_space_info_used(info, true),
+ info->full ? "" : "not ");
btrfs_info(fs_info,
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
info->total_bytes, info->bytes_used, info->bytes_pinned,
@@ -7951,7 +7952,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < fs_info->sectorsize);
- ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+ ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
hint_byte, ins, flags, delalloc);
if (!ret && !is_data) {
btrfs_dec_block_group_reservations(fs_info, ins->objectid);
@@ -8194,8 +8195,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
ins->offset, 0,
root_objectid, owner, offset,
- ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
- NULL);
+ ram_bytes, BTRFS_ADD_DELAYED_EXTENT);
return ret;
}
@@ -8256,7 +8256,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
- clean_tree_block(trans, fs_info, buf);
+ clean_tree_block(fs_info, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking(buf);
@@ -8351,10 +8351,11 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
* returns the tree buffer or an ERR_PTR on error.
*/
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 parent, u64 root_objectid,
- struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size)
+ struct btrfs_root *root,
+ u64 parent, u64 root_objectid,
+ const struct btrfs_disk_key *key,
+ int level, u64 hint,
+ u64 empty_size)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key ins;
@@ -8876,7 +8877,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_set_lock_blocking(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
- clean_tree_block(trans, fs_info, eb);
+ clean_tree_block(fs_info, eb);
}
if (eb == root->node) {
@@ -9346,8 +9347,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
- if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
- sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
+ if (btrfs_space_info_used(sinfo, true) + num_bytes +
min_allocable_bytes <= sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
cache->ro++;
@@ -9360,17 +9360,16 @@ out:
return ret;
}
-int btrfs_inc_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *cache)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
again:
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -9557,9 +9556,8 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
* all of the extents from this block group. If we can, we're good
*/
if ((space_info->total_bytes != block_group->key.offset) &&
- (space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- min_free < space_info->total_bytes)) {
+ (btrfs_space_info_used(space_info, false) + min_free <
+ space_info->total_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
@@ -10317,7 +10315,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* get the inode first so any iput calls done for the io_list
* aren't the final iput (no unlinks allowed now)
*/
- inode = lookup_free_space_inode(tree_root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
mutex_lock(&trans->transaction->cache_write_mutex);
/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ac383a3a649..d15b5ddb6732 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -98,7 +98,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
"%s: ino %llu isize %llu odd range [%llu,%llu]",
- caller, btrfs_ino(inode), isize, start, end);
+ caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
}
}
#else
@@ -144,7 +144,7 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits,
if (!set && (state->state & bits) == 0)
return;
changeset->bytes_changed += state->end - state->start + 1;
- ret = ulist_add(changeset->range_changed, state->start, state->end,
+ ret = ulist_add(&changeset->range_changed, state->start, state->end,
GFP_ATOMIC);
/* ENOMEM */
BUG_ON(ret < 0);
@@ -226,6 +226,11 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
+ /*
+ * The given mask might be not appropriate for the slab allocator,
+ * drop the unsupported bits
+ */
+ mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
return state;
@@ -1549,33 +1554,24 @@ out:
return found;
}
+static int __process_pages_contig(struct address_space *mapping,
+ struct page *locked_page,
+ pgoff_t start_index, pgoff_t end_index,
+ unsigned long page_ops, pgoff_t *index_ret);
+
static noinline void __unlock_for_delalloc(struct inode *inode,
struct page *locked_page,
u64 start, u64 end)
{
- int ret;
- struct page *pages[16];
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
- int i;
+ ASSERT(locked_page);
if (index == locked_page->index && end_index == index)
return;
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long, nr_pages,
- ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
- if (pages[i] != locked_page)
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
- nr_pages -= ret;
- index += ret;
- cond_resched();
- }
+ __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
+ PAGE_UNLOCK, NULL);
}
static noinline int lock_delalloc_pages(struct inode *inode,
@@ -1584,59 +1580,19 @@ static noinline int lock_delalloc_pages(struct inode *inode,
u64 delalloc_end)
{
unsigned long index = delalloc_start >> PAGE_SHIFT;
- unsigned long start_index = index;
+ unsigned long index_ret = index;
unsigned long end_index = delalloc_end >> PAGE_SHIFT;
- unsigned long pages_locked = 0;
- struct page *pages[16];
- unsigned long nrpages;
int ret;
- int i;
- /* the caller is responsible for locking the start index */
+ ASSERT(locked_page);
if (index == locked_page->index && index == end_index)
return 0;
- /* skip the page at the start index */
- nrpages = end_index - index + 1;
- while (nrpages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nrpages, ARRAY_SIZE(pages)), pages);
- if (ret == 0) {
- ret = -EAGAIN;
- goto done;
- }
- /* now we have an array of pages, lock them all */
- for (i = 0; i < ret; i++) {
- /*
- * the caller is taking responsibility for
- * locked_page
- */
- if (pages[i] != locked_page) {
- lock_page(pages[i]);
- if (!PageDirty(pages[i]) ||
- pages[i]->mapping != inode->i_mapping) {
- ret = -EAGAIN;
- unlock_page(pages[i]);
- put_page(pages[i]);
- goto done;
- }
- }
- put_page(pages[i]);
- pages_locked++;
- }
- nrpages -= ret;
- index += ret;
- cond_resched();
- }
- ret = 0;
-done:
- if (ret && pages_locked) {
- __unlock_for_delalloc(inode, locked_page,
- delalloc_start,
- ((u64)(start_index + pages_locked - 1)) <<
- PAGE_SHIFT);
- }
+ ret = __process_pages_contig(inode->i_mapping, locked_page, index,
+ end_index, PAGE_LOCK, &index_ret);
+ if (ret == -EAGAIN)
+ __unlock_for_delalloc(inode, locked_page, delalloc_start,
+ (u64)index_ret << PAGE_SHIFT);
return ret;
}
@@ -1726,37 +1682,47 @@ out_failed:
return found;
}
-void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
- u64 delalloc_end, struct page *locked_page,
- unsigned clear_bits,
- unsigned long page_ops)
+static int __process_pages_contig(struct address_space *mapping,
+ struct page *locked_page,
+ pgoff_t start_index, pgoff_t end_index,
+ unsigned long page_ops, pgoff_t *index_ret)
{
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
- int ret;
+ unsigned long nr_pages = end_index - start_index + 1;
+ unsigned long pages_locked = 0;
+ pgoff_t index = start_index;
struct page *pages[16];
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
+ unsigned ret;
+ int err = 0;
int i;
- clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
- if (page_ops == 0)
- return;
+ if (page_ops & PAGE_LOCK) {
+ ASSERT(page_ops == PAGE_LOCK);
+ ASSERT(index_ret && *index_ret == start_index);
+ }
if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
- mapping_set_error(inode->i_mapping, -EIO);
+ mapping_set_error(mapping, -EIO);
while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
+ ret = find_get_pages_contig(mapping, index,
min_t(unsigned long,
nr_pages, ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
+ if (ret == 0) {
+ /*
+ * Only if we're going to lock these pages,
+ * can we find nothing at @index.
+ */
+ ASSERT(page_ops & PAGE_LOCK);
+ return ret;
+ }
+ for (i = 0; i < ret; i++) {
if (page_ops & PAGE_SET_PRIVATE2)
SetPagePrivate2(pages[i]);
if (pages[i] == locked_page) {
put_page(pages[i]);
+ pages_locked++;
continue;
}
if (page_ops & PAGE_CLEAR_DIRTY)
@@ -1769,12 +1735,40 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
end_page_writeback(pages[i]);
if (page_ops & PAGE_UNLOCK)
unlock_page(pages[i]);
+ if (page_ops & PAGE_LOCK) {
+ lock_page(pages[i]);
+ if (!PageDirty(pages[i]) ||
+ pages[i]->mapping != mapping) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ err = -EAGAIN;
+ goto out;
+ }
+ }
put_page(pages[i]);
+ pages_locked++;
}
nr_pages -= ret;
index += ret;
cond_resched();
}
+out:
+ if (err && index_ret)
+ *index_ret = start_index + pages_locked - 1;
+ return err;
+}
+
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+ u64 delalloc_end, struct page *locked_page,
+ unsigned clear_bits,
+ unsigned long page_ops)
+{
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
+ NULL, GFP_NOFS);
+
+ __process_pages_contig(inode->i_mapping, locked_page,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ page_ops, NULL);
}
/*
@@ -2060,7 +2054,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
btrfs_info_rl_in_rcu(fs_info,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
- btrfs_ino(inode), start,
+ btrfs_ino(BTRFS_I(inode)), start,
rcu_str_deref(dev->name), sector);
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
@@ -2765,7 +2759,6 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
size_t size, unsigned long offset,
struct block_device *bdev,
struct bio **bio_ret,
- unsigned long max_pages,
bio_end_io_t end_io_func,
int mirror_num,
unsigned long prev_bio_flags,
@@ -2931,7 +2924,6 @@ static int __do_readpage(struct extent_io_tree *tree,
}
}
while (cur <= end) {
- unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
bool force_bio_submit = false;
if (cur >= last_byte) {
@@ -3066,10 +3058,9 @@ static int __do_readpage(struct extent_io_tree *tree,
continue;
}
- pnr -= page->index;
ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
page, sector, disk_io_size, pg_offset,
- bdev, bio, pnr,
+ bdev, bio,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag,
@@ -3210,7 +3201,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
-static void update_nr_written(struct page *page, struct writeback_control *wbc,
+static void update_nr_written(struct writeback_control *wbc,
unsigned long nr_written)
{
wbc->nr_to_write -= nr_written;
@@ -3330,7 +3321,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 block_start;
u64 iosize;
sector_t sector;
- struct extent_state *cached_state = NULL;
struct extent_map *em;
struct block_device *bdev;
size_t pg_offset = 0;
@@ -3349,10 +3339,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
else
redirty_page_for_writepage(wbc, page);
- update_nr_written(page, wbc, nr_written);
+ update_nr_written(wbc, nr_written);
unlock_page(page);
- ret = 1;
- goto done_unlocked;
+ return 1;
}
}
@@ -3360,7 +3349,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
* we don't want to touch the inode after unlocking the page,
* so we update the mapping writeback index now
*/
- update_nr_written(page, wbc, nr_written + 1);
+ update_nr_written(wbc, nr_written + 1);
end = page_end;
if (i_size <= start) {
@@ -3374,7 +3363,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
while (cur <= end) {
u64 em_end;
- unsigned long max_nr;
if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook)
@@ -3431,8 +3419,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
continue;
}
- max_nr = (i_size >> PAGE_SHIFT) + 1;
-
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
@@ -3442,11 +3428,14 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
page, sector, iosize, pg_offset,
- bdev, &epd->bio, max_nr,
+ bdev, &epd->bio,
end_bio_extent_writepage,
0, 0, 0, false);
- if (ret)
+ if (ret) {
SetPageError(page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ }
cur = cur + iosize;
pg_offset += iosize;
@@ -3454,11 +3443,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
}
done:
*nr_ret = nr;
-
-done_unlocked:
-
- /* drop our reference on any cached states */
- free_extent_state(cached_state);
return ret;
}
@@ -3761,20 +3745,21 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
p, offset >> 9, PAGE_SIZE, 0, bdev,
- &epd->bio, -1,
+ &epd->bio,
end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
if (ret) {
set_btree_ioerr(p);
- end_page_writeback(p);
+ if (PageWriteback(p))
+ end_page_writeback(p);
if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
end_extent_buffer_writeback(eb);
ret = -EIO;
break;
}
offset += PAGE_SIZE;
- update_nr_written(p, wbc, 1);
+ update_nr_written(wbc, 1);
unlock_page(p);
}
@@ -3926,8 +3911,7 @@ retry:
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
*/
-static int extent_write_cache_pages(struct extent_io_tree *tree,
- struct address_space *mapping,
+static int extent_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
@@ -4168,8 +4152,7 @@ int extent_writepages(struct extent_io_tree *tree,
.bio_flags = 0,
};
- ret = extent_write_cache_pages(tree, mapping, wbc,
- __extent_writepage, &epd,
+ ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
flush_write_bio);
flush_epd_write_bio(&epd);
return ret;
@@ -4264,8 +4247,6 @@ static int try_release_extent_state(struct extent_map_tree *map,
EXTENT_IOBITS, 0, NULL))
ret = 0;
else {
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
/*
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
@@ -4410,8 +4391,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* lookup the last file extent. We're not using i_size here
* because there might be preallocation past i_size
*/
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
- 0);
+ ret = btrfs_lookup_file_extent(NULL, root, path,
+ btrfs_ino(BTRFS_I(inode)), -1, 0);
if (ret < 0) {
btrfs_free_path(path);
return ret;
@@ -4426,7 +4407,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
found_type = found_key.type;
/* No extents, but there might be delalloc bits */
- if (found_key.objectid != btrfs_ino(inode) ||
+ if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
found_type != BTRFS_EXTENT_DATA_KEY) {
/* have to trust i_size as the end */
last = (u64)-1;
@@ -4535,8 +4516,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* lookup stuff.
*/
ret = btrfs_check_shared(trans, root->fs_info,
- root->objectid,
- btrfs_ino(inode), bytenr);
+ root->objectid,
+ btrfs_ino(BTRFS_I(inode)), bytenr);
if (trans)
btrfs_end_transaction(trans);
if (ret < 0)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 17f9ce479ed7..270d03be290e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -45,13 +45,14 @@
#define EXTENT_BUFFER_IN_TREE 10
#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
-/* these are flags for extent_clear_unlock_delalloc */
+/* these are flags for __process_pages_contig */
#define PAGE_UNLOCK (1 << 0)
#define PAGE_CLEAR_DIRTY (1 << 1)
#define PAGE_SET_WRITEBACK (1 << 2)
#define PAGE_END_WRITEBACK (1 << 3)
#define PAGE_SET_PRIVATE2 (1 << 4)
#define PAGE_SET_ERROR (1 << 5)
+#define PAGE_LOCK (1 << 6)
/*
* page->private values. Every page that is controlled by the extent
@@ -192,7 +193,7 @@ struct extent_changeset {
u64 bytes_changed;
/* Changed ranges */
- struct ulist *range_changed;
+ struct ulist range_changed;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index e97e322c28f0..f7b9a92ad56d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -255,7 +255,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
} else {
btrfs_info_rl(fs_info,
"no csum found for inode %llu start %llu",
- btrfs_ino(inode), offset);
+ btrfs_ino(BTRFS_I(inode)), offset);
}
item = NULL;
btrfs_release_path(path);
@@ -856,8 +856,8 @@ insert:
tmp = min(tmp, (next_offset - file_key.offset) >>
fs_info->sb->s_blocksize_bits);
- tmp = max((u64)1, tmp);
- tmp = min(tmp, (u64)MAX_CSUM_ITEMS(fs_info, csum_size));
+ tmp = max_t(u64, 1, tmp);
+ tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
ins_size = csum_size * tmp;
} else {
ins_size = csum_size;
@@ -977,7 +977,7 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
} else {
btrfs_err(fs_info,
"unknown file extent item type %d, inode %llu, offset %llu, root %llu",
- type, btrfs_ino(inode), extent_start,
+ type, btrfs_ino(BTRFS_I(inode)), extent_start,
root->root_key.objectid);
}
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da215d05..c1d2a07205da 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -168,7 +168,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (!defrag)
return -ENOMEM;
- defrag->ino = btrfs_ino(inode);
+ defrag->ino = btrfs_ino(BTRFS_I(inode));
defrag->transid = transid;
defrag->root = root->root_key.objectid;
@@ -702,7 +702,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
struct btrfs_key new_key;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
u64 search_start = start;
u64 disk_bytenr = 0;
u64 num_bytes = 0;
@@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_slot = 0;
int recow;
int ret;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -2062,7 +2062,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
- if (btrfs_inode_in_log(inode, fs_info->generation) ||
+ if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
(full_sync && BTRFS_I(inode)->last_trans <=
fs_info->last_trans_committed) ||
(!btrfs_have_ordered_extents_in_range(inode, start, len) &&
@@ -2203,7 +2203,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
return 0;
btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != btrfs_ino(inode) ||
+ if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
key.type != BTRFS_EXTENT_DATA_KEY)
return 0;
@@ -2237,7 +2237,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
if (btrfs_fs_incompat(fs_info, NO_HOLES))
goto out;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = offset;
@@ -2285,9 +2285,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
}
btrfs_release_path(path);
- ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
- 0, 0, end - offset, 0, end - offset,
- 0, 0, 0);
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+ offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
if (ret)
return ret;
@@ -2876,7 +2875,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (!ret)
ret = btrfs_prealloc_file_range(inode, mode,
range->start,
- range->len, 1 << inode->i_blkbits,
+ range->len, i_blocksize(inode),
offset + len, &alloc_hint);
else
btrfs_free_reserved_data_space(inode, range->start,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7015892c9ee8..1a131f7d6c1b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -94,12 +94,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
return inode;
}
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
+struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path)
{
struct inode *inode = NULL;
- struct btrfs_fs_info *fs_info = root->fs_info;
u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
spin_lock(&block_group->lock);
@@ -109,7 +108,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
if (inode)
return inode;
- inode = __lookup_free_space_inode(root, path,
+ inode = __lookup_free_space_inode(fs_info->tree_root, path,
block_group->key.objectid);
if (IS_ERR(inode))
return inode;
@@ -192,7 +191,7 @@ static int __create_free_space_inode(struct btrfs_root *root,
return 0;
}
-int create_free_space_inode(struct btrfs_root *root,
+int create_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
@@ -200,11 +199,11 @@ int create_free_space_inode(struct btrfs_root *root,
int ret;
u64 ino;
- ret = btrfs_find_free_objectid(root, &ino);
+ ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
if (ret < 0)
return ret;
- return __create_free_space_inode(root, trans, path, ino,
+ return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
block_group->key.objectid);
}
@@ -227,21 +226,21 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
return ret;
}
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
+int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct inode *inode)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
- struct btrfs_path *path = btrfs_alloc_path();
bool locked = false;
- if (!path) {
- ret = -ENOMEM;
- goto fail;
- }
-
if (block_group) {
+ struct btrfs_path *path = btrfs_alloc_path();
+
+ if (!path) {
+ ret = -ENOMEM;
+ goto fail;
+ }
locked = true;
mutex_lock(&trans->transaction->cache_write_mutex);
if (!list_empty(&block_group->io_list)) {
@@ -258,8 +257,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_CLEAR;
spin_unlock(&block_group->lock);
+ btrfs_free_path(path);
}
- btrfs_free_path(path);
btrfs_i_size_write(inode, 0);
truncate_pagecache(inode, 0);
@@ -286,14 +285,14 @@ fail:
return ret;
}
-static int readahead_cache(struct inode *inode)
+static void readahead_cache(struct inode *inode)
{
struct file_ra_state *ra;
unsigned long last_index;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
- return -ENOMEM;
+ return;
file_ra_state_init(ra, inode->i_mapping);
last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
@@ -301,8 +300,6 @@ static int readahead_cache(struct inode *inode)
page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
kfree(ra);
-
- return 0;
}
static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
@@ -313,7 +310,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
/* Make sure we can fit our crcs into the first page */
@@ -730,9 +727,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (ret)
return ret;
- ret = readahead_cache(inode);
- if (ret)
- goto out;
+ readahead_cache(inode);
ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
if (ret)
@@ -828,7 +823,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_root *root = fs_info->tree_root;
struct inode *inode;
struct btrfs_path *path;
int ret = 0;
@@ -852,7 +846,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
path->search_commit_root = 1;
path->skip_locking = 1;
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
return 0;
@@ -1128,8 +1122,7 @@ cleanup_bitmap_list(struct list_head *bitmap_list)
static void noinline_for_stack
cleanup_write_cache_enospc(struct inode *inode,
struct btrfs_io_ctl *io_ctl,
- struct extent_state **cached_state,
- struct list_head *bitmap_list)
+ struct extent_state **cached_state)
{
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
@@ -1225,8 +1218,6 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
* @ctl - the free space cache we are going to write out
* @block_group - the block_group for this cache if it belongs to a block_group
* @trans - the trans handle
- * @path - the path to use
- * @offset - the offset for the key we'll insert
*
* This function writes out a free space cache struct to disk for quick recovery
* on mount. This will return 0 if it was successful in writing the cache out,
@@ -1236,8 +1227,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_block_group_cache *block_group,
struct btrfs_io_ctl *io_ctl,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path, u64 offset)
+ struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_state *cached_state = NULL;
@@ -1365,7 +1355,7 @@ out_nospc_locked:
mutex_unlock(&ctl->cache_writeout_mutex);
out_nospc:
- cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
+ cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
up_write(&block_group->data_rwsem);
@@ -1378,7 +1368,6 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
- struct btrfs_root *root = fs_info->tree_root;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
int ret = 0;
@@ -1390,13 +1379,12 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
}
spin_unlock(&block_group->lock);
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode))
return 0;
- ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
- &block_group->io_ctl, trans,
- path, block_group->key.objectid);
+ ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
+ block_group, &block_group->io_ctl, trans);
if (ret) {
#ifdef DEBUG
btrfs_err(fs_info,
@@ -3543,8 +3531,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
return 0;
memset(&io_ctl, 0, sizeof(io_ctl));
- ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
- trans, path, 0);
+ ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
if (!ret) {
/*
* At this point writepages() didn't error out, so our metadata
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 6f3c025a2c6c..79eca4cabb1c 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -51,18 +51,17 @@ struct btrfs_free_space_op {
struct btrfs_io_ctl;
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
+struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path);
-int create_free_space_inode(struct btrfs_root *root,
+int create_free_space_inode(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
+int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index ff0c55337c2e..dd7fb22a955a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1269,7 +1269,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
list_del(&free_space_root->dirty_list);
btrfs_tree_lock(free_space_root->node);
- clean_tree_block(trans, fs_info, free_space_root->node);
+ clean_tree_block(fs_info, free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
0, 1);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 144b119ff43f..3bbb8f095953 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -467,7 +467,7 @@ again:
}
if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
+ ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1e861a063721..f02823f088c2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -71,6 +71,7 @@ struct btrfs_dio_data {
u64 reserve;
u64 unsubmitted_oe_range_start;
u64 unsubmitted_oe_range_end;
+ int overwrite;
};
static const struct inode_operations btrfs_dir_inode_operations;
@@ -108,11 +109,11 @@ static noinline int cow_file_range(struct inode *inode,
u64 start, u64 end, u64 delalloc_end,
int *page_started, unsigned long *nr_written,
int unlock, struct btrfs_dedupe_hash *hash);
-static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
- u64 len, u64 orig_start,
- u64 block_start, u64 block_len,
- u64 orig_block_len, u64 ram_bytes,
- int type);
+static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
+ u64 orig_start, u64 block_start,
+ u64 block_len, u64 orig_block_len,
+ u64 ram_bytes, int compress_type,
+ int type);
static int btrfs_dirty_inode(struct inode *inode);
@@ -166,7 +167,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_key key;
size_t datasize;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = start;
key.type = BTRFS_EXTENT_DATA_KEY;
@@ -388,6 +389,15 @@ static inline int inode_need_compress(struct inode *inode)
return 0;
}
+static inline void inode_should_defrag(struct inode *inode,
+ u64 start, u64 end, u64 num_bytes, u64 small_write)
+{
+ /* If this is a small write inside eof, kick off a defrag */
+ if (num_bytes < small_write &&
+ (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+ btrfs_add_inode_defrag(NULL, inode);
+}
+
/*
* we create compressed extents in two phases. The first
* phase compresses a range of pages that have already been
@@ -430,10 +440,7 @@ static noinline void compress_file_range(struct inode *inode,
int compress_type = fs_info->compress_type;
int redirty = 0;
- /* if this is a small write inside eof, kick off a defrag */
- if ((end - start + 1) < SZ_16K &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode);
+ inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
actual_end = min_t(u64, isize, end + 1);
again:
@@ -541,7 +548,7 @@ cont:
* to make an uncompressed inline extent.
*/
ret = cow_file_range_inline(root, inode, start, end,
- 0, 0, NULL);
+ 0, BTRFS_COMPRESS_NONE, NULL);
} else {
/* try making a compressed inline extent */
ret = cow_file_range_inline(root, inode, start, end,
@@ -690,7 +697,6 @@ static noinline void submit_compressed_extents(struct inode *inode,
struct btrfs_key ins;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree;
int ret = 0;
@@ -778,46 +784,19 @@ retry:
* here we're doing allocation and writeback of the
* compressed pages
*/
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
-
- em = alloc_extent_map();
- if (!em) {
- ret = -ENOMEM;
- goto out_free_reserve;
- }
- em->start = async_extent->start;
- em->len = async_extent->ram_size;
- em->orig_start = em->start;
- em->mod_start = em->start;
- em->mod_len = em->len;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->orig_block_len = ins.offset;
- em->ram_bytes = async_extent->ram_size;
- em->bdev = fs_info->fs_devices->latest_bdev;
- em->compress_type = async_extent->compress_type;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- em->generation = -1;
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
- }
-
- if (ret)
+ em = create_io_em(inode, async_extent->start,
+ async_extent->ram_size, /* len */
+ async_extent->start, /* orig_start */
+ ins.objectid, /* block_start */
+ ins.offset, /* block_len */
+ ins.offset, /* orig_block_len */
+ async_extent->ram_size, /* ram_bytes */
+ async_extent->compress_type,
+ BTRFS_ORDERED_COMPRESSED);
+ if (IS_ERR(em))
+ /* ret value is not necessary due to void function */
goto out_free_reserve;
+ free_extent_map(em);
ret = btrfs_add_ordered_extent_compress(inode,
async_extent->start,
@@ -952,7 +931,6 @@ static noinline int cow_file_range(struct inode *inode,
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
if (btrfs_is_free_space_inode(inode)) {
@@ -965,15 +943,12 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
- /* if this is a small write inside eof, kick off defrag */
- if (num_bytes < SZ_64K &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode);
+ inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
if (start == 0) {
/* lets try to make an inline extent */
- ret = cow_file_range_inline(root, inode, start, end, 0, 0,
- NULL);
+ ret = cow_file_range_inline(root, inode, start, end, 0,
+ BTRFS_COMPRESS_NONE, NULL);
if (ret == 0) {
extent_clear_unlock_delalloc(inode, start, end,
delalloc_end, NULL,
@@ -1008,39 +983,18 @@ static noinline int cow_file_range(struct inode *inode,
if (ret < 0)
goto out_unlock;
- em = alloc_extent_map();
- if (!em) {
- ret = -ENOMEM;
- goto out_reserve;
- }
- em->start = start;
- em->orig_start = em->start;
ram_size = ins.offset;
- em->len = ins.offset;
- em->mod_start = em->start;
- em->mod_len = em->len;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->orig_block_len = ins.offset;
- em->ram_bytes = ram_size;
- em->bdev = fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- em->generation = -1;
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, start,
- start + ram_size - 1, 0);
- }
- if (ret)
+ em = create_io_em(inode, start, ins.offset, /* len */
+ start, /* orig_start */
+ ins.objectid, /* block_start */
+ ins.offset, /* block_len */
+ ins.offset, /* orig_block_len */
+ ram_size, /* ram_bytes */
+ BTRFS_COMPRESS_NONE, /* compress_type */
+ BTRFS_ORDERED_REGULAR /* type */);
+ if (IS_ERR(em))
goto out_reserve;
+ free_extent_map(em);
cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1164,7 +1118,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
u64 cur_end;
- int limit = 10 * SZ_1M;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
@@ -1196,12 +1149,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
- if (atomic_read(&fs_info->async_delalloc_pages) > limit) {
- wait_event(fs_info->async_submit_wait,
- (atomic_read(&fs_info->async_delalloc_pages) <
- limit));
- }
-
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
@@ -1250,11 +1197,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
struct btrfs_key found_key;
+ struct extent_map *em;
u64 cow_start;
u64 cur_offset;
u64 extent_end;
@@ -1269,7 +1216,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int nocow;
int check_prev = 1;
bool nolock;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path) {
@@ -1286,30 +1233,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
nolock = btrfs_is_free_space_inode(inode);
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
-
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode, start, end, end,
- locked_page,
- EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, PAGE_UNLOCK |
- PAGE_CLEAR_DIRTY |
- PAGE_SET_WRITEBACK |
- PAGE_END_WRITEBACK);
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- trans->block_rsv = &fs_info->delalloc_block_rsv;
-
cow_start = (u64)-1;
cur_offset = start;
while (1) {
- ret = btrfs_lookup_file_extent(trans, root, path, ino,
+ ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
goto error;
@@ -1382,7 +1309,7 @@ next_slot:
goto out_check;
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
- if (btrfs_cross_ref_exist(trans, root, ino,
+ if (btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr))
goto out_check;
@@ -1455,35 +1382,28 @@ out_check:
}
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- struct extent_map *em;
- struct extent_map_tree *em_tree;
- em_tree = &BTRFS_I(inode)->extent_tree;
- em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
- em->start = cur_offset;
- em->orig_start = found_key.offset - extent_offset;
- em->len = num_bytes;
- em->block_len = num_bytes;
- em->block_start = disk_bytenr;
- em->orig_block_len = disk_num_bytes;
- em->ram_bytes = ram_bytes;
- em->bdev = fs_info->fs_devices->latest_bdev;
- em->mod_start = em->start;
- em->mod_len = em->len;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_FILLING, &em->flags);
- em->generation = -1;
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em, 1);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, em->start,
- em->start + em->len - 1, 0);
+ u64 orig_start = found_key.offset - extent_offset;
+
+ em = create_io_em(inode, cur_offset, num_bytes,
+ orig_start,
+ disk_bytenr, /* block_start */
+ num_bytes, /* block_len */
+ disk_num_bytes, /* orig_block_len */
+ ram_bytes, BTRFS_COMPRESS_NONE,
+ BTRFS_ORDERED_PREALLOC);
+ if (IS_ERR(em)) {
+ if (!nolock && nocow)
+ btrfs_end_write_no_snapshoting(root);
+ if (nocow)
+ btrfs_dec_nocow_writers(fs_info,
+ disk_bytenr);
+ ret = PTR_ERR(em);
+ goto error;
}
+ free_extent_map(em);
+ }
+
+ if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
type = BTRFS_ORDERED_PREALLOC;
} else {
type = BTRFS_ORDERED_NOCOW;
@@ -1534,10 +1454,6 @@ out_check:
}
error:
- err = btrfs_end_transaction(trans);
- if (!ret)
- ret = err;
-
if (ret && cur_offset < end)
extent_clear_unlock_delalloc(inode, cur_offset, end, end,
locked_page, EXTENT_LOCKED |
@@ -1609,7 +1525,7 @@ static void btrfs_split_extent_hook(struct inode *inode,
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
- u64 num_extents;
+ u32 num_extents;
u64 new_size;
/*
@@ -1617,13 +1533,10 @@ static void btrfs_split_extent_hook(struct inode *inode,
* applies here, just in reverse.
*/
new_size = orig->end - split + 1;
- num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(new_size);
new_size = split - orig->start;
- num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
- if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ num_extents += count_max_extents(new_size);
+ if (count_max_extents(size) >= num_extents)
return;
}
@@ -1643,7 +1556,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
struct extent_state *other)
{
u64 new_size, old_size;
- u64 num_extents;
+ u32 num_extents;
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
@@ -1681,14 +1594,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
* this case.
*/
old_size = other->end - other->start + 1;
- num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ num_extents = count_max_extents(old_size);
old_size = new->end - new->start + 1;
- num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
-
- if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ num_extents += count_max_extents(old_size);
+ if (count_max_extents(new_size) >= num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
@@ -1797,8 +1706,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 len = state->end + 1 - state->start;
- u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
- BTRFS_MAX_EXTENT_SIZE);
+ u32 num_extents = count_max_extents(len);
spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1997,8 +1905,7 @@ out:
* at IO completion time based on sums calculated at bio submission time.
*/
static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 file_offset,
- struct list_head *list)
+ struct inode *inode, struct list_head *list)
{
struct btrfs_ordered_sum *sum;
@@ -2161,7 +2068,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
goto out;
if (!extent_inserted) {
- ins.objectid = btrfs_ino(inode);
+ ins.objectid = btrfs_ino(BTRFS_I(inode));
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
@@ -2194,8 +2101,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
- btrfs_ino(inode), file_pos,
- ram_bytes, &ins);
+ btrfs_ino(BTRFS_I(inode)), file_pos, ram_bytes, &ins);
/*
* Release the reserved range from inode dirty range map, as it is
* already moved into delayed_ref_head
@@ -2320,7 +2226,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
u64 num_bytes;
if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
- inum == btrfs_ino(inode))
+ inum == btrfs_ino(BTRFS_I(inode)))
return 0;
key.objectid = root_id;
@@ -2589,7 +2495,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
if (ret)
goto out_free_path;
again:
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = start;
@@ -2768,7 +2674,7 @@ record_old_file_extents(struct inode *inode,
if (!path)
goto out_kfree;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = new->file_pos;
@@ -2803,7 +2709,7 @@ record_old_file_extents(struct inode *inode,
btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid != btrfs_ino(inode))
+ if (key.objectid != btrfs_ino(BTRFS_I(inode)))
break;
if (key.type != BTRFS_EXTENT_DATA_KEY)
break;
@@ -2993,8 +2899,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out_unlock;
}
- add_pending_csums(trans, inode, ordered_extent->file_offset,
- &ordered_extent->list);
+ add_pending_csums(trans, inode, &ordered_extent->list);
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode_fallback(trans, root, inode);
@@ -3123,9 +3028,8 @@ static int __readpage_endio_check(struct inode *inode,
kunmap_atomic(kaddr);
return 0;
zeroit:
- btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
- "csum failed ino %llu off %llu csum %u expected csum %u",
- btrfs_ino(inode), start, csum, csum_expected);
+ btrfs_print_data_csum_error(inode, start, csum, csum_expected,
+ io_bio->mirror_num);
memset(kaddr + pgoff, 1, len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
@@ -3326,7 +3230,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
- ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
+ ret = btrfs_insert_orphan_item(trans, root,
+ btrfs_ino(BTRFS_I(inode)));
if (ret) {
atomic_dec(&root->orphan_inodes);
if (reserve) {
@@ -3382,7 +3287,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
atomic_dec(&root->orphan_inodes);
if (trans)
ret = btrfs_del_orphan_item(trans, root,
- btrfs_ino(inode));
+ btrfs_ino(BTRFS_I(inode)));
}
if (release_rsv)
@@ -3789,7 +3694,7 @@ cache_index:
goto cache_acl;
btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
- if (location.objectid != btrfs_ino(inode))
+ if (location.objectid != btrfs_ino(BTRFS_I(inode)))
goto cache_acl;
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -3811,14 +3716,14 @@ cache_acl:
* any xattrs or acls
*/
maybe_acls = acls_after_inode_item(leaf, path->slots[0],
- btrfs_ino(inode), &first_xattr_slot);
+ btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
if (first_xattr_slot != -1) {
path->slots[0] = first_xattr_slot;
ret = btrfs_load_inode_props(inode, path);
if (ret)
btrfs_err(fs_info,
"error loading props for ino %llu (root %llu): %d",
- btrfs_ino(inode),
+ btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
}
btrfs_free_path(path);
@@ -3993,7 +3898,8 @@ noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
*/
static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
const char *name, int name_len)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4040,10 +3946,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
* that we delay to delete it, and just do this deletion when
* we update the inode item.
*/
- if (BTRFS_I(inode)->dir_index) {
+ if (inode->dir_index) {
ret = btrfs_delayed_delete_inode_ref(inode);
if (!ret) {
- index = BTRFS_I(inode)->dir_index;
+ index = inode->dir_index;
goto skip_backref;
}
}
@@ -4064,15 +3970,15 @@ skip_backref:
goto err;
}
- ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
- inode, dir_ino);
+ ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+ dir_ino);
if (ret != 0 && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto err;
}
- ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
- dir, index);
+ ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+ index);
if (ret == -ENOENT)
ret = 0;
else if (ret)
@@ -4082,26 +3988,27 @@ err:
if (ret)
goto out;
- btrfs_i_size_write(dir, dir->i_size - name_len * 2);
- inode_inc_iversion(inode);
- inode_inc_iversion(dir);
- inode->i_ctime = dir->i_mtime =
- dir->i_ctime = current_time(inode);
- ret = btrfs_update_inode(trans, root, dir);
+ btrfs_i_size_write(&dir->vfs_inode,
+ dir->vfs_inode.i_size - name_len * 2);
+ inode_inc_iversion(&inode->vfs_inode);
+ inode_inc_iversion(&dir->vfs_inode);
+ inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
+ dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
out:
return ret;
}
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len)
{
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
- drop_nlink(inode);
- ret = btrfs_update_inode(trans, root, inode);
+ drop_nlink(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
}
return ret;
}
@@ -4139,10 +4046,12 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
+ btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+ 0);
- ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
- dentry->d_name.name, dentry->d_name.len);
+ ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+ dentry->d_name.len);
if (ret)
goto out;
@@ -4170,7 +4079,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_key key;
u64 index;
int ret;
- u64 dir_ino = btrfs_ino(dir);
+ u64 dir_ino = btrfs_ino(BTRFS_I(dir));
path = btrfs_alloc_path();
if (!path)
@@ -4222,7 +4131,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
+ ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -4249,14 +4158,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
- if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
return -EPERM;
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
return PTR_ERR(trans);
- if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+ if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, root, dir,
BTRFS_I(inode)->location.objectid,
dentry->d_name.name,
@@ -4271,8 +4180,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
/* now the directory is empty */
- err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
- dentry->d_name.name, dentry->d_name.len);
+ err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ BTRFS_I(d_inode(dentry)), dentry->d_name.name,
+ dentry->d_name.len);
if (!err) {
btrfs_i_size_write(inode, 0);
/*
@@ -4398,7 +4308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int extent_type = -1;
int ret;
int err = 0;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
u64 bytes_deleted = 0;
bool be_nice = 0;
bool should_throttle = 0;
@@ -4437,7 +4347,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* items.
*/
if (min_type == 0 && root == BTRFS_I(inode)->root)
- btrfs_kill_delayed_inode_items(inode);
+ btrfs_kill_delayed_inode_items(BTRFS_I(inode));
key.objectid = ino;
key.offset = (u64)-1;
@@ -4702,6 +4612,13 @@ error:
btrfs_free_path(path);
+ if (err == 0) {
+ /* only inline file may have last_size != new_size */
+ if (new_size >= fs_info->sectorsize ||
+ new_size > fs_info->max_inline)
+ ASSERT(last_size == new_size);
+ }
+
if (be_nice && bytes_deleted > SZ_32M) {
unsigned long updates = trans->delayed_ref_updates;
if (updates) {
@@ -4870,8 +4787,8 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
return ret;
}
- ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
- 0, 0, len, 0, len, 0, 0, 0);
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+ offset, 0, 0, len, 0, len, 0, 0, 0);
if (ret)
btrfs_abort_transaction(trans, ret);
else
@@ -5087,6 +5004,13 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (ret && inode->i_nlink) {
int err;
+ /* To get a stable disk_i_size */
+ err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (err) {
+ btrfs_orphan_del(NULL, inode);
+ return err;
+ }
+
/*
* failed to truncate, disk_i_size is only adjusted down
* as we remove extents, so it should represent the true
@@ -5282,7 +5206,7 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
- ret = btrfs_commit_inode_delayed_inode(inode);
+ ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
if (ret) {
btrfs_orphan_del(NULL, inode);
goto no_delete;
@@ -5402,12 +5326,12 @@ void btrfs_evict_inode(struct inode *inode)
trans->block_rsv = &fs_info->trans_block_rsv;
if (!(root == fs_info->tree_root ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
- btrfs_return_ino(root, btrfs_ino(inode));
+ btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
no_delete:
- btrfs_remove_delayed_node(inode);
+ btrfs_remove_delayed_node(BTRFS_I(inode));
clear_inode(inode);
}
@@ -5429,8 +5353,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
if (!path)
return -ENOMEM;
- di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
- namelen, 0);
+ di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
+ name, namelen, 0);
if (IS_ERR(di))
ret = PTR_ERR(di);
@@ -5485,7 +5409,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
- if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
+ if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
goto out;
@@ -5520,7 +5444,7 @@ static void inode_tree_add(struct inode *inode)
struct rb_node **p;
struct rb_node *parent;
struct rb_node *new = &BTRFS_I(inode)->rb_node;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
if (inode_unhashed(inode))
return;
@@ -5531,9 +5455,9 @@ static void inode_tree_add(struct inode *inode)
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
- if (ino < btrfs_ino(&entry->vfs_inode))
+ if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
p = &parent->rb_left;
- else if (ino > btrfs_ino(&entry->vfs_inode))
+ else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
@@ -5593,9 +5517,9 @@ again:
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
- if (objectid < btrfs_ino(&entry->vfs_inode))
+ if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
node = node->rb_left;
- else if (objectid > btrfs_ino(&entry->vfs_inode))
+ else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
node = node->rb_right;
else
break;
@@ -5603,7 +5527,7 @@ again:
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(&entry->vfs_inode)) {
+ if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
node = prev;
break;
}
@@ -5612,7 +5536,7 @@ again:
}
while (node) {
entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(&entry->vfs_inode) + 1;
+ objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
@@ -5796,7 +5720,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
- if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return 1;
}
return 0;
@@ -5865,7 +5789,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = ctx->pos;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
@@ -6062,7 +5986,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
struct extent_buffer *leaf;
int ret;
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = (u64)-1;
@@ -6094,7 +6018,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != btrfs_ino(inode) ||
+ if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
found_key.type != BTRFS_DIR_INDEX_KEY) {
BTRFS_I(inode)->index_cnt = 2;
goto out;
@@ -6115,7 +6039,7 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
int ret = 0;
if (BTRFS_I(dir)->index_cnt == (u64)-1) {
- ret = btrfs_inode_delayed_dir_index_count(dir);
+ ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
if (ret) {
ret = btrfs_set_inode_index_count(dir);
if (ret)
@@ -6294,7 +6218,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (ret)
btrfs_err(fs_info,
"error inheriting props for ino %llu (root %llu): %d",
- btrfs_ino(inode), root->root_key.objectid, ret);
+ btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
return inode;
@@ -6327,8 +6251,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(parent_inode)->root;
- u64 ino = btrfs_ino(inode);
- u64 parent_ino = btrfs_ino(parent_inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
+ u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode));
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
@@ -6427,8 +6351,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- mode, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
@@ -6499,8 +6423,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- mode, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
@@ -6609,7 +6533,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
d_instantiate(dentry, inode);
- btrfs_log_new_name(trans, inode, NULL, parent);
+ btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
btrfs_balance_delayed_items(fs_info);
@@ -6649,8 +6573,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- S_IFDIR | mode, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_fail;
@@ -6810,7 +6734,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
int err = 0;
u64 extent_start = 0;
u64 extent_end = 0;
- u64 objectid = btrfs_ino(inode);
+ u64 objectid = btrfs_ino(BTRFS_I(inode));
u32 found_type;
struct btrfs_path *path = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -7068,7 +6992,7 @@ insert:
write_unlock(&em_tree->lock);
out:
- trace_btrfs_get_extent(root, inode, em);
+ trace_btrfs_get_extent(root, BTRFS_I(inode), em);
btrfs_free_path(path);
if (trans) {
@@ -7225,9 +7149,11 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
int ret;
if (type != BTRFS_ORDERED_NOCOW) {
- em = create_pinned_em(inode, start, len, orig_start,
- block_start, block_len, orig_block_len,
- ram_bytes, type);
+ em = create_io_em(inode, start, len, orig_start,
+ block_start, block_len, orig_block_len,
+ ram_bytes,
+ BTRFS_COMPRESS_NONE, /* compress_type */
+ type);
if (IS_ERR(em))
goto out;
}
@@ -7264,7 +7190,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
em = btrfs_create_dio_extent(inode, start, ins.offset, start,
ins.objectid, ins.offset, ins.offset,
- ins.offset, 0);
+ ins.offset, BTRFS_ORDERED_REGULAR);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(em))
btrfs_free_reserved_extent(fs_info, ins.objectid,
@@ -7282,7 +7208,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *ram_bytes)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_trans_handle *trans;
struct btrfs_path *path;
int ret;
struct extent_buffer *leaf;
@@ -7302,8 +7227,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
if (!path)
return -ENOMEM;
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
- offset, 0);
+ ret = btrfs_lookup_file_extent(NULL, root, path,
+ btrfs_ino(BTRFS_I(inode)), offset, 0);
if (ret < 0)
goto out;
@@ -7319,7 +7244,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
ret = 0;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != btrfs_ino(inode) ||
+ if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
key.type != BTRFS_EXTENT_DATA_KEY) {
/* not our file or wrong item type, must cow */
goto out;
@@ -7385,15 +7310,9 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
* look for other files referencing this extent, if we
* find any we must cow
*/
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = 0;
- goto out;
- }
- ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+ ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
key.offset - backref_offset, disk_bytenr);
- btrfs_end_transaction(trans);
if (ret) {
ret = 0;
goto out;
@@ -7570,17 +7489,23 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
return ret;
}
-static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
- u64 len, u64 orig_start,
- u64 block_start, u64 block_len,
- u64 orig_block_len, u64 ram_bytes,
- int type)
+/* The callers of this must take lock_extent() */
+static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
+ u64 orig_start, u64 block_start,
+ u64 block_len, u64 orig_block_len,
+ u64 ram_bytes, int compress_type,
+ int type)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
+ ASSERT(type == BTRFS_ORDERED_PREALLOC ||
+ type == BTRFS_ORDERED_COMPRESSED ||
+ type == BTRFS_ORDERED_NOCOW ||
+ type == BTRFS_ORDERED_REGULAR);
+
em_tree = &BTRFS_I(inode)->extent_tree;
em = alloc_extent_map();
if (!em)
@@ -7588,8 +7513,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->start = start;
em->orig_start = orig_start;
- em->mod_start = start;
- em->mod_len = len;
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
@@ -7598,8 +7521,12 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->ram_bytes = ram_bytes;
em->generation = -1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
- if (type == BTRFS_ORDERED_PREALLOC)
+ if (type == BTRFS_ORDERED_PREALLOC) {
set_bit(EXTENT_FLAG_FILLING, &em->flags);
+ } else if (type == BTRFS_ORDERED_COMPRESSED) {
+ set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
+ }
do {
btrfs_drop_extent_cache(inode, em->start,
@@ -7607,6 +7534,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
+ /*
+ * The caller has taken lock_extent(), who could race with us
+ * to add em?
+ */
} while (ret == -EEXIST);
if (ret) {
@@ -7614,6 +7545,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
return ERR_PTR(ret);
}
+ /* em got 2 refs now, callers needs to do free_extent_map once. */
return em;
}
@@ -7621,10 +7553,8 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
{
- unsigned num_extents;
+ unsigned num_extents = count_max_extents(len);
- num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
@@ -7804,7 +7734,7 @@ unlock:
* Need to update the i_size under the extent lock so buffered
* readers will get the updated i_size when we unlock.
*/
- if (start + len > i_size_read(inode))
+ if (!dio_data->overwrite && start + len > i_size_read(inode))
i_size_write(inode, start + len);
adjust_dio_outstanding_extents(inode, dio_data, len);
@@ -8254,7 +8184,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
"direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
- btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf,
+ btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
+ bio->bi_opf,
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, err);
@@ -8679,15 +8610,14 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
+ dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
goto out;
- dio_data.outstanding_extents = div64_u64(count +
- BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE);
+ dio_data.outstanding_extents = count_max_extents(count);
/*
* We need to know how many extents we reserved so that we can
@@ -8831,7 +8761,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
if (PageWriteback(page) || PageDirty(page))
return 0;
- return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
+ return __btrfs_releasepage(page, gfp_flags);
}
static void btrfs_invalidatepage(struct page *page, unsigned int offset,
@@ -8964,10 +8894,10 @@ again:
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*/
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int btrfs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
@@ -9000,7 +8930,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = btrfs_delalloc_reserve_space(inode, page_start,
reserved_space);
if (!ret) {
- ret = file_update_time(vma->vm_file);
+ ret = file_update_time(vmf->vma->vm_file);
reserved = 1;
}
if (ret) {
@@ -9032,7 +8962,7 @@ again:
* we can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish
*/
- ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
+ ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
if (ordered) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -9056,11 +8986,11 @@ again:
}
/*
- * XXX - page_mkwrite gets called every time the page is dirtied, even
- * if it was already dirty, so for space accounting reasons we need to
- * clear any delalloc bits for the range we are fixing to save. There
- * is probably a better way to do this, but for now keep consistent with
- * prepare_pages in the normal write path.
+ * page_mkwrite gets called when the page is firstly dirtied after it's
+ * faulted in, but write(2) could also dirty a page and set delalloc
+ * bits, thus in this case for space account reason, we still need to
+ * clear any delalloc bits within this page range since we have to
+ * reserve data&meta space before lock_page() (see above comments).
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -9384,7 +9314,7 @@ void btrfs_destroy_inode(struct inode *inode)
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(fs_info, "inode %llu still on the orphan list",
- btrfs_ino(inode));
+ btrfs_ino(BTRFS_I(inode)));
atomic_dec(&root->orphan_inodes);
}
@@ -9513,8 +9443,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
struct inode *old_inode = old_dentry->d_inode;
struct timespec ctime = current_time(old_inode);
struct dentry *parent;
- u64 old_ino = btrfs_ino(old_inode);
- u64 new_ino = btrfs_ino(new_inode);
+ u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+ u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
u64 root_objectid;
@@ -9571,7 +9501,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_dentry->d_name.name,
new_dentry->d_name.len,
old_ino,
- btrfs_ino(new_dir), old_idx);
+ btrfs_ino(BTRFS_I(new_dir)),
+ old_idx);
if (ret)
goto out_fail;
}
@@ -9587,7 +9518,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len,
new_ino,
- btrfs_ino(old_dir), new_idx);
+ btrfs_ino(BTRFS_I(old_dir)),
+ new_idx);
if (ret)
goto out_fail;
}
@@ -9603,8 +9535,10 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_inode->i_ctime = ctime;
if (old_dentry->d_parent != new_dentry->d_parent) {
- btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
- btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
+ btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
+ BTRFS_I(old_inode), 1);
+ btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
+ BTRFS_I(new_inode), 1);
}
/* src is a subvolume */
@@ -9615,8 +9549,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else { /* src is an inode */
- ret = __btrfs_unlink_inode(trans, root, old_dir,
- old_dentry->d_inode,
+ ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
+ BTRFS_I(old_dentry->d_inode),
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
@@ -9635,8 +9569,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_dentry->d_name.name,
new_dentry->d_name.len);
} else { /* dest is an inode */
- ret = __btrfs_unlink_inode(trans, dest, new_dir,
- new_dentry->d_inode,
+ ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+ BTRFS_I(new_dentry->d_inode),
new_dentry->d_name.name,
new_dentry->d_name.len);
if (!ret)
@@ -9670,13 +9604,15 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (root_log_pinned) {
parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+ parent);
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
parent = old_dentry->d_parent;
- btrfs_log_new_name(trans, new_inode, new_dir, parent);
+ btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
+ parent);
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
@@ -9693,11 +9629,11 @@ out_fail:
* allow the tasks to sync it.
*/
if (ret && (root_log_pinned || dest_log_pinned)) {
- if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
- btrfs_inode_in_log(new_dir, fs_info->generation) ||
- btrfs_inode_in_log(old_inode, fs_info->generation) ||
+ if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
- btrfs_inode_in_log(new_inode, fs_info->generation)))
+ btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
btrfs_set_log_full_commit(fs_info, trans);
if (root_log_pinned) {
@@ -9736,7 +9672,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
inode = btrfs_new_inode(trans, root, dir,
dentry->d_name.name,
dentry->d_name.len,
- btrfs_ino(dir),
+ btrfs_ino(BTRFS_I(dir)),
objectid,
S_IFCHR | WHITEOUT_MODE,
&index);
@@ -9784,10 +9720,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
u64 index = 0;
u64 root_objectid;
int ret;
- u64 old_ino = btrfs_ino(old_inode);
+ u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
- if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
/* we only allow rename subvolume link between subvolumes */
@@ -9795,7 +9731,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
return -EXDEV;
if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
- (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
+ (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
return -ENOTEMPTY;
if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -9870,7 +9806,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.name,
new_dentry->d_name.len,
old_ino,
- btrfs_ino(new_dir), index);
+ btrfs_ino(BTRFS_I(new_dir)), index);
if (ret)
goto out_fail;
}
@@ -9883,7 +9819,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = current_time(old_dir);
if (old_dentry->d_parent != new_dentry->d_parent)
- btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+ btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
+ BTRFS_I(old_inode), 1);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
@@ -9891,8 +9828,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else {
- ret = __btrfs_unlink_inode(trans, root, old_dir,
- d_inode(old_dentry),
+ ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
+ BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
@@ -9906,7 +9843,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode) {
inode_inc_iversion(new_inode);
new_inode->i_ctime = current_time(new_inode);
- if (unlikely(btrfs_ino(new_inode) ==
+ if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -9915,8 +9852,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.len);
BUG_ON(new_inode->i_nlink == 0);
} else {
- ret = btrfs_unlink_inode(trans, dest, new_dir,
- d_inode(new_dentry),
+ ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+ BTRFS_I(d_inode(new_dentry)),
new_dentry->d_name.name,
new_dentry->d_name.len);
}
@@ -9942,7 +9879,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (log_pinned) {
struct dentry *parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+ parent);
btrfs_end_log_trans(root);
log_pinned = false;
}
@@ -9969,11 +9907,11 @@ out_fail:
* allow the tasks to sync it.
*/
if (ret && log_pinned) {
- if (btrfs_inode_in_log(old_dir, fs_info->generation) ||
- btrfs_inode_in_log(new_dir, fs_info->generation) ||
- btrfs_inode_in_log(old_inode, fs_info->generation) ||
+ if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
+ btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
(new_inode &&
- btrfs_inode_in_log(new_inode, fs_info->generation)))
+ btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
btrfs_set_log_full_commit(fs_info, trans);
btrfs_end_log_trans(root);
@@ -10237,8 +10175,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- S_IFLNK|S_IRWXUGO, &index);
+ dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
+ objectid, S_IFLNK|S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out_unlock;
@@ -10264,7 +10202,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
err = -ENOMEM;
goto out_unlock_inode;
}
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = 0;
key.type = BTRFS_EXTENT_DATA_KEY;
datasize = btrfs_file_extent_calc_inline_size(name_len);
@@ -10517,7 +10455,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out;
inode = btrfs_new_inode(trans, root, dir, NULL, 0,
- btrfs_ino(dir), objectid, mode, &index);
+ btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
inode = NULL;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 21e51b0ba188..d8539979b44f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -395,7 +395,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
q = bdev_get_queue(device->bdev);
if (blk_queue_discard(q)) {
num_devices++;
- minlen = min((u64)q->limits.discard_granularity,
+ minlen = min_t(u64, q->limits.discard_granularity,
minlen);
}
}
@@ -487,8 +487,7 @@ static noinline int create_subvol(struct inode *dir,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(fs_info, &block_rsv,
- qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
goto fail_free;
}
trans->block_rsv = &block_rsv;
@@ -601,7 +600,7 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_add_root_ref(trans, fs_info,
objectid, root->root_key.objectid,
- btrfs_ino(dir), index, name, namelen);
+ btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
@@ -613,7 +612,7 @@ fail:
kfree(root_item);
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
if (async_transid) {
*async_transid = trans->transid;
@@ -657,7 +656,7 @@ static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root)
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
- struct dentry *dentry, char *name, int namelen,
+ struct dentry *dentry,
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
@@ -670,12 +669,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return -EINVAL;
- pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+ pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
if (!pending_snapshot)
return -ENOMEM;
pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
- GFP_NOFS);
+ GFP_KERNEL);
pending_snapshot->path = btrfs_alloc_path();
if (!pending_snapshot->root_item || !pending_snapshot->path) {
ret = -ENOMEM;
@@ -753,9 +752,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
d_instantiate(dentry, inode);
ret = 0;
fail:
- btrfs_subvolume_release_metadata(fs_info,
- &pending_snapshot->block_rsv,
- pending_snapshot->qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
if (atomic_dec_and_test(&root->will_be_snapshoted))
wake_up_atomic_t(&root->will_be_snapshoted);
@@ -874,7 +871,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
goto out_up_read;
if (snap_src) {
- error = create_snapshot(snap_src, dir, dentry, name, namelen,
+ error = create_snapshot(snap_src, dir, dentry,
async_transid, readonly, inherit);
} else {
error = create_subvol(dir, dentry, name, namelen,
@@ -941,7 +938,7 @@ static int find_new_extents(struct btrfs_root *root,
struct btrfs_file_extent_item *extent;
int type;
int ret;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -1780,7 +1777,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
int ret = 0;
u64 flags = 0;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
return -EINVAL;
down_read(&fs_info->subvol_sem);
@@ -1812,7 +1809,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (ret)
goto out;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
goto out_drop_write;
}
@@ -2446,7 +2443,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err)
goto out_dput;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
}
@@ -2497,7 +2494,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
- btrfs_record_snapshot_destroy(trans, dir);
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
ret = btrfs_unlink_subvol(trans, root, dir,
dest->root_key.objectid,
@@ -2555,7 +2552,7 @@ out_end_trans:
err = ret;
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved);
+ btrfs_subvolume_release_metadata(fs_info, &block_rsv);
out_up_write:
up_write(&fs_info->subvol_sem);
if (err) {
@@ -2613,9 +2610,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
ret = btrfs_defrag_root(root);
- if (ret)
- goto out;
- ret = btrfs_defrag_root(root->fs_info->extent_root);
break;
case S_IFREG:
if (!(file->f_mode & FMODE_WRITE)) {
@@ -3047,11 +3041,21 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
cmp->src_pages = src_pgarr;
cmp->dst_pages = dst_pgarr;
- ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
+ /*
+ * If deduping ranges in the same inode, locking rules make it mandatory
+ * to always lock pages in ascending order to avoid deadlocks with
+ * concurrent tasks (such as starting writeback/delalloc).
+ */
+ if (src == dst && dst_loff < loff) {
+ swap(src_pgarr, dst_pgarr);
+ swap(loff, dst_loff);
+ }
+
+ ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
if (ret)
goto out;
- ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
+ ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
out:
if (ret)
@@ -3059,8 +3063,7 @@ out:
return 0;
}
-static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
- u64 dst_loff, u64 len, struct cmp_pages *cmp)
+static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
{
int ret = 0;
int i;
@@ -3128,26 +3131,27 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
int ret;
u64 len = olen;
struct cmp_pages cmp;
- int same_inode = 0;
+ bool same_inode = (src == dst);
u64 same_lock_start = 0;
u64 same_lock_len = 0;
- if (src == dst)
- same_inode = 1;
-
if (len == 0)
return 0;
- if (same_inode) {
+ if (same_inode)
inode_lock(src);
+ else
+ btrfs_double_inode_lock(src, dst);
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- goto out_unlock;
- ret = extent_same_check_offsets(src, dst_loff, &len, olen);
- if (ret)
- goto out_unlock;
+ ret = extent_same_check_offsets(src, loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+
+ ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+ if (ret)
+ goto out_unlock;
+ if (same_inode) {
/*
* Single inode case wants the same checks, except we
* don't want our length pushed out past i_size as
@@ -3175,16 +3179,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
same_lock_start = min_t(u64, loff, dst_loff);
same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
- } else {
- btrfs_double_inode_lock(src, dst);
-
- ret = extent_same_check_offsets(src, loff, &len, olen);
- if (ret)
- goto out_unlock;
-
- ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
- if (ret)
- goto out_unlock;
}
/* don't make the dst file partly checksummed */
@@ -3236,7 +3230,7 @@ again:
}
/* pass original length for comparison so we stay within i_size */
- ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
+ ret = btrfs_cmp_data(olen, &cmp);
if (ret == 0)
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
@@ -3399,8 +3393,7 @@ static void clone_update_extent_map(struct inode *inode,
* data into the destination inode's inline extent if the later is greater then
* the former.
*/
-static int clone_copy_inline_extent(struct inode *src,
- struct inode *dst,
+static int clone_copy_inline_extent(struct inode *dst,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_key *new_key,
@@ -3420,7 +3413,7 @@ static int clone_copy_inline_extent(struct inode *src,
if (new_key->offset > 0)
return -EOPNOTSUPP;
- key.objectid = btrfs_ino(dst);
+ key.objectid = btrfs_ino(BTRFS_I(dst));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -3435,7 +3428,7 @@ static int clone_copy_inline_extent(struct inode *src,
goto copy_inline_extent;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid == btrfs_ino(dst) &&
+ if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
key.type == BTRFS_EXTENT_DATA_KEY) {
ASSERT(key.offset > 0);
return -EOPNOTSUPP;
@@ -3469,7 +3462,7 @@ static int clone_copy_inline_extent(struct inode *src,
} else if (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
- if (key.objectid == btrfs_ino(dst) &&
+ if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
key.type == BTRFS_EXTENT_DATA_KEY)
return -EOPNOTSUPP;
}
@@ -3563,7 +3556,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
path->reada = READA_FORWARD;
/* clone data */
- key.objectid = btrfs_ino(src);
+ key.objectid = btrfs_ino(BTRFS_I(src));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = off;
@@ -3606,7 +3599,7 @@ process_slot:
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.type > BTRFS_EXTENT_DATA_KEY ||
- key.objectid != btrfs_ino(src))
+ key.objectid != btrfs_ino(BTRFS_I(src)))
break;
if (key.type == BTRFS_EXTENT_DATA_KEY) {
@@ -3659,7 +3652,7 @@ process_slot:
path->leave_spinning = 0;
memcpy(&new_key, &key, sizeof(new_key));
- new_key.objectid = btrfs_ino(inode);
+ new_key.objectid = btrfs_ino(BTRFS_I(inode));
if (off <= key.offset)
new_key.offset = key.offset + destoff - off;
else
@@ -3749,7 +3742,7 @@ process_slot:
fs_info,
disko, diskl, 0,
root->root_key.objectid,
- btrfs_ino(inode),
+ btrfs_ino(BTRFS_I(inode)),
new_key.offset - datao);
if (ret) {
btrfs_abort_transaction(trans,
@@ -3779,7 +3772,7 @@ process_slot:
size -= skip + trim;
datal -= skip + trim;
- ret = clone_copy_inline_extent(src, inode,
+ ret = clone_copy_inline_extent(inode,
trans, path,
&new_key,
drop_start,
@@ -5129,7 +5122,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
down_write(&fs_info->subvol_sem);
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
goto out;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 041c3326d109..bc2aba810629 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -432,7 +432,7 @@ out:
}
/* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
struct list_head *logged_list,
const loff_t start,
const loff_t end)
@@ -442,7 +442,7 @@ void btrfs_get_logged_extents(struct inode *inode,
struct rb_node *n;
struct rb_node *prev;
- tree = &BTRFS_I(inode)->ordered_tree;
+ tree = &inode->ordered_tree;
spin_lock_irq(&tree->lock);
n = __tree_search(&tree->tree, end, &prev);
if (!n)
@@ -984,8 +984,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
}
disk_i_size = BTRFS_I(inode)->disk_i_size;
- /* truncate file */
- if (disk_i_size > i_size) {
+ /*
+ * truncate file.
+ * If ordered is not NULL, then this is called from endio and
+ * disk_i_size will be updated by either truncate itself or any
+ * in-flight IOs which are inside the disk_i_size.
+ *
+ * Because btrfs_setsize() may set i_size with disk_i_size if truncate
+ * fails somehow, we need to make sure we have a precise disk_i_size by
+ * updating it as usual.
+ *
+ */
+ if (!ordered && disk_i_size > i_size) {
BTRFS_I(inode)->disk_i_size = orig_offset;
ret = 0;
goto out;
@@ -1032,25 +1042,22 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
/* We treat this entry as if it doesn't exist */
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
- if (test->file_offset + test->len <= disk_i_size)
+
+ if (entry_end(test) <= disk_i_size)
break;
if (test->file_offset >= i_size)
break;
- if (entry_end(test) > disk_i_size) {
- /*
- * we don't update disk_i_size now, so record this
- * undealt i_size. Or we will not know the real
- * i_size.
- */
- if (test->outstanding_isize < offset)
- test->outstanding_isize = offset;
- if (ordered &&
- ordered->outstanding_isize >
- test->outstanding_isize)
- test->outstanding_isize =
- ordered->outstanding_isize;
- goto out;
- }
+
+ /*
+ * We don't update disk_i_size now, so record this undealt
+ * i_size. Or we will not know the real i_size.
+ */
+ if (test->outstanding_isize < offset)
+ test->outstanding_isize = offset;
+ if (ordered &&
+ ordered->outstanding_isize > test->outstanding_isize)
+ test->outstanding_isize = ordered->outstanding_isize;
+ goto out;
}
new_i_size = min_t(u64, offset, i_size);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5f2b0ca28705..a8cb8efe6fae 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -75,6 +75,8 @@ struct btrfs_ordered_sum {
* in the logging code. */
#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
* complete in the current transaction. */
+#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@@ -201,7 +203,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len);
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct inode *inode,
+void btrfs_get_logged_extents(struct btrfs_inode *inode,
struct list_head *logged_list,
const loff_t start,
const loff_t end);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f2621e330954..d6cb155ef7a1 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -279,7 +279,7 @@ static void inode_prop_iterator(void *ctx,
if (unlikely(ret))
btrfs_warn(root->fs_info,
"error applying prop %s to ino %llu (root %llu): %d",
- handler->xattr_name, btrfs_ino(inode),
+ handler->xattr_name, btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
else
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
@@ -288,7 +288,7 @@ static void inode_prop_iterator(void *ctx,
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
int ret;
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 662821f1252c..a5da750c1087 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -319,7 +319,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
- fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
+ fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
goto out;
@@ -876,7 +876,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
goto out;
}
- fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
+ fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
goto out;
@@ -1019,7 +1019,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
list_del(&quota_root->dirty_list);
btrfs_tree_lock(quota_root->node);
- clean_tree_block(trans, fs_info, quota_root->node);
+ clean_tree_block(fs_info, quota_root->node);
btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
@@ -1038,6 +1038,15 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
+static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup,
+ u64 num_bytes)
+{
+ btrfs_warn(fs_info,
+ "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
+ qgroup->qgroupid, qgroup->reserved, num_bytes);
+ qgroup->reserved = 0;
+}
/*
* The easy accounting, if we are adding/removing the only ref for an extent
* then this qgroup and all of the parent qgroups get their reference and
@@ -1065,8 +1074,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
- if (sign > 0)
- qgroup->reserved -= num_bytes;
+ if (sign > 0) {
+ if (WARN_ON(qgroup->reserved < num_bytes))
+ report_reserved_underflow(fs_info, qgroup, num_bytes);
+ else
+ qgroup->reserved -= num_bytes;
+ }
qgroup_dirty(fs_info, qgroup);
@@ -1086,8 +1099,13 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
- if (sign > 0)
- qgroup->reserved -= num_bytes;
+ if (sign > 0) {
+ if (WARN_ON(qgroup->reserved < num_bytes))
+ report_reserved_underflow(fs_info, qgroup,
+ num_bytes);
+ else
+ qgroup->reserved -= num_bytes;
+ }
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
@@ -1156,7 +1174,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL;
- tmp = ulist_alloc(GFP_NOFS);
+ tmp = ulist_alloc(GFP_KERNEL);
if (!tmp)
return -ENOMEM;
@@ -1205,7 +1223,7 @@ out:
return ret;
}
-int __del_qgroup_relation(struct btrfs_trans_handle *trans,
+static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
@@ -1216,7 +1234,7 @@ int __del_qgroup_relation(struct btrfs_trans_handle *trans,
int ret = 0;
int err;
- tmp = ulist_alloc(GFP_NOFS);
+ tmp = ulist_alloc(GFP_KERNEL);
if (!tmp)
return -ENOMEM;
@@ -1446,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
while (node) {
record = rb_entry(node, struct btrfs_qgroup_extent_record,
node);
- ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
- &record->old_roots);
+ if (WARN_ON(!record->old_roots))
+ ret = btrfs_find_all_roots(NULL, fs_info,
+ record->bytenr, 0, &record->old_roots);
if (ret < 0)
break;
if (qgroup_to_skip)
@@ -1486,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
return 0;
}
+int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *qrecord)
+{
+ struct ulist *old_root;
+ u64 bytenr = qrecord->bytenr;
+ int ret;
+
+ ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Here we don't need to get the lock of
+ * trans->transaction->delayed_refs, since inserted qrecord won't
+ * be deleted, only qrecord->node may be modified (new qrecord insert)
+ *
+ * So modifying qrecord->old_roots is safe here
+ */
+ qrecord->old_roots = old_root;
+ return 0;
+}
+
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
gfp_t gfp_flag)
@@ -1511,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
spin_lock(&delayed_refs->lock);
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
spin_unlock(&delayed_refs->lock);
- if (ret > 0)
+ if (ret > 0) {
kfree(record);
- return 0;
+ return 0;
+ }
+ return btrfs_qgroup_trace_extent_post(fs_info, record);
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -1571,8 +1614,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
* If we increment the root nodes slot counter past the number of
* elements, 1 is returned to signal completion of the search.
*/
-static int adjust_slots_upwards(struct btrfs_root *root,
- struct btrfs_path *path, int root_level)
+static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
{
int level = 0;
int nr, slot;
@@ -1713,7 +1755,7 @@ walk_down:
goto out;
/* Nonzero return here means we completed our search */
- ret = adjust_slots_upwards(root, path, root_level);
+ ret = adjust_slots_upwards(path, root_level);
if (ret)
break;
@@ -1927,13 +1969,14 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
u64 nr_old_roots = 0;
int ret = 0;
+ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+ return 0;
+
if (new_roots)
nr_new_roots = new_roots->nnodes;
if (old_roots)
nr_old_roots = old_roots->nnodes;
- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
- goto out_free;
BUG_ON(!fs_info->quota_root);
trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
@@ -2170,9 +2213,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
goto out;
}
- rcu_read_lock();
level_size = fs_info->nodesize;
- rcu_read_unlock();
}
/*
@@ -2306,7 +2347,20 @@ out:
return ret;
}
-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
+static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
+{
+ if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
+ qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
+ return false;
+
+ if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
+ qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
+ return false;
+
+ return true;
+}
+
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@@ -2347,16 +2401,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
qg = unode_aux_to_qgroup(unode);
- if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + (s64)qg->rfer + num_bytes >
- qg->max_rfer) {
- ret = -EDQUOT;
- goto out;
- }
-
- if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + (s64)qg->excl + num_bytes >
- qg->max_excl) {
+ if (enforce && !qgroup_check_limits(qg, num_bytes)) {
ret = -EDQUOT;
goto out;
}
@@ -2424,7 +2469,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
qg = unode_aux_to_qgroup(unode);
- qg->reserved -= num_bytes;
+ if (WARN_ON(qg->reserved < num_bytes))
+ report_reserved_underflow(fs_info, qg, num_bytes);
+ else
+ qg->reserved -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
@@ -2439,11 +2487,6 @@ out:
spin_unlock(&fs_info->qgroup_lock);
}
-static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
-{
- return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
- num_bytes);
-}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
@@ -2803,7 +2846,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
return 0;
changeset.bytes_changed = 0;
- changeset.range_changed = ulist_alloc(GFP_NOFS);
+ ulist_init(&changeset.range_changed);
ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
trace_btrfs_qgroup_reserve_data(inode, start, len,
@@ -2811,21 +2854,21 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
- ret = qgroup_reserve(root, changeset.bytes_changed);
+ ret = qgroup_reserve(root, changeset.bytes_changed, true);
if (ret < 0)
goto cleanup;
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
return ret;
cleanup:
/* cleanup already reserved ranges */
ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(changeset.range_changed, &uiter)))
+ while ((unode = ulist_next(&changeset.range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
GFP_NOFS);
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
return ret;
}
@@ -2837,23 +2880,22 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
int ret;
changeset.bytes_changed = 0;
- changeset.range_changed = ulist_alloc(GFP_NOFS);
- if (!changeset.range_changed)
- return -ENOMEM;
-
+ ulist_init(&changeset.range_changed);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
goto out;
if (free) {
- qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+ btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+ BTRFS_I(inode)->root->objectid,
+ changeset.bytes_changed);
trace_op = QGROUP_FREE;
}
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
out:
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
return ret;
}
@@ -2892,7 +2934,8 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
return __btrfs_qgroup_release_data(inode, start, len, 0);
}
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ bool enforce)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -2902,7 +2945,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- ret = qgroup_reserve(root, num_bytes);
+ ret = qgroup_reserve(root, num_bytes, enforce);
if (ret < 0)
return ret;
atomic_add(num_bytes, &root->qgroup_meta_rsv);
@@ -2921,7 +2964,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
if (reserved == 0)
return;
- qgroup_free(root, reserved);
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
}
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
@@ -2935,7 +2978,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
atomic_sub(num_bytes, &root->qgroup_meta_rsv);
- qgroup_free(root, num_bytes);
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
}
/*
@@ -2950,22 +2993,22 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
int ret;
changeset.bytes_changed = 0;
- changeset.range_changed = ulist_alloc(GFP_NOFS);
- if (WARN_ON(!changeset.range_changed))
- return;
-
+ ulist_init(&changeset.range_changed);
ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_QGROUP_RESERVED, &changeset);
WARN_ON(ret < 0);
if (WARN_ON(changeset.bytes_changed)) {
ULIST_ITER_INIT(&iter);
- while ((unode = ulist_next(changeset.range_changed, &iter))) {
+ while ((unode = ulist_next(&changeset.range_changed, &iter))) {
btrfs_warn(BTRFS_I(inode)->root->fs_info,
"leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
inode->i_ino, unode->val, unode->aux);
}
- qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+ btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
+ BTRFS_I(inode)->root->objectid,
+ changeset.bytes_changed);
+
}
- ulist_free(changeset.range_changed);
+ ulist_release(&changeset.range_changed);
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 416ae8e1d23c..26932a8a1993 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
/*
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at commit trans time.
+ * So qgroup can account it at transaction committing time.
*
- * No lock version, caller must acquire delayed ref lock and allocate memory.
+ * No lock version, caller must acquire delayed ref lock and allocated memory,
+ * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
*
* Return 0 for success insert
* Return >0 for existing record, caller can free @record safely.
@@ -108,11 +109,37 @@ int btrfs_qgroup_trace_extent_nolock(
struct btrfs_qgroup_extent_record *record);
/*
+ * Post handler after qgroup_trace_extent_nolock().
+ *
+ * NOTE: Current qgroup does the expensive backref walk at transaction
+ * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
+ * new transaction.
+ * This is designed to allow btrfs_find_all_roots() to get correct new_roots
+ * result.
+ *
+ * However for old_roots there is no need to do backref walk at that time,
+ * since we search commit roots to walk backref and result will always be
+ * correct.
+ *
+ * Due to the nature of no lock version, we can't do backref there.
+ * So we must call btrfs_qgroup_trace_extent_post() after exiting
+ * spinlock context.
+ *
+ * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
+ * using current root, then we can move all expensive backref walk out of
+ * transaction committing, but not now as qgroup accounting will be wrong again.
+ */
+int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_extent_record *qrecord);
+
+/*
* Inform qgroup to trace one dirty extent, specified by @bytenr and
* @num_bytes.
* So qgroup can account it at commit trans time.
*
- * Better encapsulated version.
+ * Better encapsulated version, with memory allocation and backref walk for
+ * commit roots.
+ * So this can sleep.
*
* Return 0 if the operation is done.
* Return <0 for error, like memory allocation failure or invalid parameter
@@ -181,7 +208,8 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes);
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ bool enforce);
void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d2a9a1ee5361..1571bf26dc07 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -677,11 +677,9 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
- int walk = 0;
spin_lock_irqsave(&h->lock, flags);
list_for_each_entry(cur, &h->hash_list, hash_list) {
- walk++;
if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
spin_lock(&cur->bio_list_lock);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 379711048fb0..ddbde0f08365 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1548,9 +1548,9 @@ again:
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
- if (objectid < btrfs_ino(&entry->vfs_inode))
+ if (objectid < btrfs_ino(entry))
node = node->rb_left;
- else if (objectid > btrfs_ino(&entry->vfs_inode))
+ else if (objectid > btrfs_ino(entry))
node = node->rb_right;
else
break;
@@ -1558,7 +1558,7 @@ again:
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(&entry->vfs_inode)) {
+ if (objectid <= btrfs_ino(entry)) {
node = prev;
break;
}
@@ -1573,7 +1573,7 @@ again:
return inode;
}
- objectid = btrfs_ino(&entry->vfs_inode) + 1;
+ objectid = btrfs_ino(entry) + 1;
if (cond_resched_lock(&root->inode_lock))
goto again;
@@ -1609,8 +1609,8 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
return -ENOMEM;
bytenr -= BTRFS_I(reloc_inode)->index_cnt;
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
- bytenr, 0);
+ ret = btrfs_lookup_file_extent(NULL, root, path,
+ btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
if (ret < 0)
goto out;
if (ret > 0) {
@@ -1698,11 +1698,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
if (first) {
inode = find_next_inode(root, key.objectid);
first = 0;
- } else if (inode && btrfs_ino(inode) < key.objectid) {
+ } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
btrfs_add_delayed_iput(inode);
inode = find_next_inode(root, key.objectid);
}
- if (inode && btrfs_ino(inode) == key.objectid) {
+ if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
end = key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
WARN_ON(!IS_ALIGNED(key.offset,
@@ -2088,7 +2088,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
inode = find_next_inode(root, objectid);
if (!inode)
break;
- ino = btrfs_ino(inode);
+ ino = btrfs_ino(BTRFS_I(inode));
if (ino > max_key->objectid) {
iput(inode);
@@ -3543,7 +3543,7 @@ truncate:
goto out;
}
- ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
+ ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
@@ -4334,7 +4334,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group);
- ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
+ ret = btrfs_inc_block_group_ro(fs_info, rc->block_group);
if (ret) {
err = ret;
goto out;
@@ -4347,8 +4347,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
goto out;
}
- inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
- path);
+ inode = lookup_free_space_inode(fs_info, rc->block_group, path);
btrfs_free_path(path);
if (!IS_ERR(inode))
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 4c6735491ee0..a08224eab8b4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -74,7 +74,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
*
* If we find something return 0, otherwise > 0, < 0 on error.
*/
-int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
+int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key)
{
@@ -207,7 +207,7 @@ out:
}
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_root_item *item)
+ const struct btrfs_key *key, struct btrfs_root_item *item)
{
/*
* Make sure generation v1 and v2 match. See update_root for details.
@@ -337,7 +337,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
/* drop the root item for 'key' from 'root' */
int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key)
+ const struct btrfs_key *key)
{
struct btrfs_path *path;
int ret;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9a94670536a6..ff9a11c39f5e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -282,9 +282,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
u64 *extent_physical,
struct btrfs_device **extent_dev,
int *extent_mirror_num);
-static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
- struct scrub_wr_ctx *wr_ctx,
- struct btrfs_fs_info *fs_info,
+static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
struct btrfs_device *dev,
int is_dev_replace);
static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
@@ -501,7 +499,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
spin_lock_init(&sctx->stat_lock);
init_waitqueue_head(&sctx->list_wait);
- ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
+ ret = scrub_setup_wr_ctx(&sctx->wr_ctx,
fs_info->dev_replace.tgtdev, is_dev_replace);
if (ret) {
scrub_free_ctx(sctx);
@@ -3584,7 +3582,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
- ret = btrfs_inc_block_group_ro(root, cache);
+ ret = btrfs_inc_block_group_ro(fs_info, cache);
if (!ret && is_dev_replace) {
/*
* If we are doing a device replace wait for any tasks
@@ -4084,9 +4082,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
btrfs_put_bbio(bbio);
}
-static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
- struct scrub_wr_ctx *wr_ctx,
- struct btrfs_fs_info *fs_info,
+static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
struct btrfs_device *dev,
int is_dev_replace)
{
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b5ae7d3d1896..da687dc79cce 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -265,7 +265,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
function, line, errstr);
return;
}
- ACCESS_ONCE(trans->transaction->aborted) = errno;
+ WRITE_ONCE(trans->transaction->aborted, errno);
/* Wake up anybody who may be waiting on this transaction */
wake_up(&fs_info->transaction_wait);
wake_up(&fs_info->transaction_blocked_wait);
@@ -1114,7 +1114,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
static int btrfs_fill_super(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
- void *data, int silent)
+ void *data)
{
struct inode *inode;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
@@ -1611,8 +1611,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
btrfs_sb(s)->bdev_holder = fs_type;
- error = btrfs_fill_super(s, fs_devices, data,
- flags & MS_SILENT ? 1 : 0);
+ error = btrfs_fill_super(s, fs_devices, data);
}
if (error) {
deactivate_locked_super(s);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0e0508f488b2..6b3e0fc2fe7a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -474,7 +474,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
static struct btrfs_trans_handle *
start_transaction(struct btrfs_root *root, unsigned int num_items,
- unsigned int type, enum btrfs_reserve_flush_enum flush)
+ unsigned int type, enum btrfs_reserve_flush_enum flush,
+ bool enforce_qgroups)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -505,9 +506,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
* Do the reservation before we join the transaction so we can do all
* the appropriate flushing if need be.
*/
- if (num_items > 0 && root != fs_info->chunk_root) {
+ if (num_items && root != fs_info->chunk_root) {
qgroup_reserved = num_items * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved);
+ ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
+ enforce_qgroups);
if (ret)
return ERR_PTR(ret);
@@ -613,8 +615,9 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
- BTRFS_RESERVE_FLUSH_ALL);
+ BTRFS_RESERVE_FLUSH_ALL, true);
}
+
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
@@ -625,7 +628,14 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
u64 num_bytes;
int ret;
- trans = btrfs_start_transaction(root, num_items);
+ /*
+ * We have two callers: unlink and block group removal. The
+ * former should succeed even if we will temporarily exceed
+ * quota and the latter operates on the extent root so
+ * qgroup enforcement is ignored anyway.
+ */
+ trans = start_transaction(root, num_items, TRANS_START,
+ BTRFS_RESERVE_FLUSH_ALL, false);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
@@ -654,25 +664,25 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
- BTRFS_RESERVE_FLUSH_LIMIT);
+ BTRFS_RESERVE_FLUSH_LIMIT, true);
}
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
{
- return start_transaction(root, 0, TRANS_JOIN,
- BTRFS_RESERVE_NO_FLUSH);
+ return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
+ true);
}
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
}
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_USERSPACE,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
}
/*
@@ -691,7 +701,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_ATTACH,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
}
/*
@@ -707,7 +717,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
struct btrfs_trans_handle *trans;
trans = start_transaction(root, 0, TRANS_ATTACH,
- BTRFS_RESERVE_NO_FLUSH);
+ BTRFS_RESERVE_NO_FLUSH, true);
if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
btrfs_wait_for_commit(root->fs_info, 0);
@@ -866,14 +876,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (lock && !atomic_read(&info->open_ioctl_trans) &&
should_end_transaction(trans) &&
- ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
+ READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
spin_lock(&info->trans_lock);
if (cur_trans->state == TRANS_STATE_RUNNING)
cur_trans->state = TRANS_STATE_BLOCKED;
spin_unlock(&info->trans_lock);
}
- if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
+ if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
if (throttle)
return btrfs_commit_transaction(trans);
else
@@ -1354,12 +1364,8 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
* enabled. If this check races with the ioctl, rescan will
* kick in anyway.
*/
- mutex_lock(&fs_info->qgroup_ioctl_lock);
- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
- mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
- }
- mutex_unlock(&fs_info->qgroup_ioctl_lock);
/*
* We are going to commit transaction, see btrfs_commit_transaction()
@@ -1504,7 +1510,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* check if there is a file/dir which has the same name. */
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
- btrfs_ino(parent_inode),
+ btrfs_ino(BTRFS_I(parent_inode)),
dentry->d_name.name,
dentry->d_name.len, 0);
if (dir_item != NULL && !IS_ERR(dir_item)) {
@@ -1598,7 +1604,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
*/
ret = btrfs_add_root_ref(trans, fs_info, objectid,
parent_root->root_key.objectid,
- btrfs_ino(parent_inode), index,
+ btrfs_ino(BTRFS_I(parent_inode)), index,
dentry->d_name.name, dentry->d_name.len);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1940,7 +1946,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
int ret;
/* Stop the commit early if ->aborted is set */
- if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
btrfs_end_transaction(trans);
return ret;
@@ -2080,7 +2086,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
atomic_read(&cur_trans->num_writers) == 1);
/* ->aborted might be set after the previous check, so check it */
- if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto scrub_continue;
}
@@ -2194,14 +2200,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* The tasks which save the space cache and inode cache may also
* update ->aborted, check it.
*/
- if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
- btrfs_prepare_extent_commit(trans, fs_info);
+ btrfs_prepare_extent_commit(fs_info);
cur_trans = fs_info->running_transaction;
@@ -2251,7 +2257,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto scrub_continue;
}
- ret = write_ctree_super(trans, fs_info, 0);
+ ret = write_all_supers(fs_info, 0);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
goto scrub_continue;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index eeffff84f280..3806853cde08 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -97,7 +97,7 @@
#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
int inode_only,
const loff_t start,
const loff_t end,
@@ -631,8 +631,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* file. This must be done before the btrfs_drop_extents run
* so we don't try to drop this extent.
*/
- ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
- start, 0);
+ ret = btrfs_lookup_file_extent(trans, root, path,
+ btrfs_ino(BTRFS_I(inode)), start, 0);
if (ret == 0 &&
(found_type == BTRFS_FILE_EXTENT_REG ||
@@ -843,7 +843,7 @@ out:
static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- struct inode *dir,
+ struct btrfs_inode *dir,
struct btrfs_dir_item *di)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -875,7 +875,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
+ ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name,
+ name_len);
if (ret)
goto out;
else
@@ -991,8 +992,8 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_root *log_root,
- struct inode *dir, struct inode *inode,
- struct extent_buffer *eb,
+ struct btrfs_inode *dir,
+ struct btrfs_inode *inode,
u64 inode_objectid, u64 parent_objectid,
u64 ref_index, char *name, int namelen,
int *search_done)
@@ -1047,12 +1048,11 @@ again:
parent_objectid,
victim_name,
victim_name_len)) {
- inc_nlink(inode);
+ inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
- ret = btrfs_unlink_inode(trans, root, dir,
- inode, victim_name,
- victim_name_len);
+ ret = btrfs_unlink_inode(trans, root, dir, inode,
+ victim_name, victim_name_len);
kfree(victim_name);
if (ret)
return ret;
@@ -1115,16 +1115,16 @@ again:
victim_name_len)) {
ret = -ENOENT;
victim_parent = read_one_inode(root,
- parent_objectid);
+ parent_objectid);
if (victim_parent) {
- inc_nlink(inode);
+ inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root,
- victim_parent,
- inode,
- victim_name,
- victim_name_len);
+ BTRFS_I(victim_parent),
+ inode,
+ victim_name,
+ victim_name_len);
if (!ret)
ret = btrfs_run_delayed_items(
trans,
@@ -1295,8 +1295,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
/* if we already have a perfect match, we're done */
- if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
- ref_index, name, namelen)) {
+ if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
+ btrfs_ino(BTRFS_I(inode)), ref_index,
+ name, namelen)) {
/*
* look for a conflicting back reference in the
* metadata. if we find one we have to unlink that name
@@ -1307,7 +1308,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (!search_done) {
ret = __add_inode_ref(trans, root, path, log,
- dir, inode, eb,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
inode_objectid,
parent_objectid,
ref_index, name, namelen,
@@ -1360,7 +1362,7 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
}
static int count_inode_extrefs(struct btrfs_root *root,
- struct inode *inode, struct btrfs_path *path)
+ struct btrfs_inode *inode, struct btrfs_path *path)
{
int ret = 0;
int name_len;
@@ -1404,7 +1406,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
}
static int count_inode_refs(struct btrfs_root *root,
- struct inode *inode, struct btrfs_path *path)
+ struct btrfs_inode *inode, struct btrfs_path *path)
{
int ret;
struct btrfs_key key;
@@ -1477,19 +1479,19 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
int ret;
u64 nlink = 0;
- u64 ino = btrfs_ino(inode);
+ u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- ret = count_inode_refs(root, inode, path);
+ ret = count_inode_refs(root, BTRFS_I(inode), path);
if (ret < 0)
goto out;
nlink = ret;
- ret = count_inode_extrefs(root, inode, path);
+ ret = count_inode_extrefs(root, BTRFS_I(inode), path);
if (ret < 0)
goto out;
@@ -1769,7 +1771,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
if (!exists)
goto out;
- ret = drop_one_dir_item(trans, root, path, dir, dst_di);
+ ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di);
if (ret)
goto out;
@@ -2052,8 +2054,8 @@ again:
}
inc_nlink(inode);
- ret = btrfs_unlink_inode(trans, root, dir, inode,
- name, name_len);
+ ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ BTRFS_I(inode), name, name_len);
if (!ret)
ret = btrfs_run_delayed_items(trans, fs_info);
kfree(name);
@@ -2469,7 +2471,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, fs_info, next);
+ clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2549,7 +2551,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, fs_info, next);
+ clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2627,7 +2629,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, fs_info, next);
+ clean_tree_block(fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2958,7 +2960,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- ret = write_ctree_super(trans, fs_info, 1);
+ ret = write_all_supers(fs_info, 1);
if (ret) {
btrfs_set_log_full_commit(fs_info, trans);
btrfs_abort_transaction(trans, ret);
@@ -3084,7 +3086,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *dir, u64 index)
+ struct btrfs_inode *dir, u64 index)
{
struct btrfs_root *log;
struct btrfs_dir_item *di;
@@ -3094,14 +3096,14 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
int bytes_del = 0;
u64 dir_ino = btrfs_ino(dir);
- if (BTRFS_I(dir)->logged_trans < trans->transid)
+ if (dir->logged_trans < trans->transid)
return 0;
ret = join_running_log_trans(root);
if (ret)
return 0;
- mutex_lock(&BTRFS_I(dir)->log_mutex);
+ mutex_lock(&dir->log_mutex);
log = root->log_root;
path = btrfs_alloc_path();
@@ -3176,7 +3178,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
fail:
btrfs_free_path(path);
out_unlock:
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
+ mutex_unlock(&dir->log_mutex);
if (ret == -ENOSPC) {
btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
@@ -3192,25 +3194,25 @@ out_unlock:
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *inode, u64 dirid)
+ struct btrfs_inode *inode, u64 dirid)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log;
u64 index;
int ret;
- if (BTRFS_I(inode)->logged_trans < trans->transid)
+ if (inode->logged_trans < trans->transid)
return 0;
ret = join_running_log_trans(root);
if (ret)
return 0;
log = root->log_root;
- mutex_lock(&BTRFS_I(inode)->log_mutex);
+ mutex_lock(&inode->log_mutex);
ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
dirid, &index);
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_unlock(&inode->log_mutex);
if (ret == -ENOSPC) {
btrfs_set_log_full_commit(fs_info, trans);
ret = 0;
@@ -3260,7 +3262,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
* to replay anything deleted before the fsync
*/
static noinline int log_dir_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path, int key_type,
struct btrfs_log_ctx *ctx,
@@ -3450,7 +3452,7 @@ done:
* key logged by this transaction.
*/
static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path,
struct btrfs_log_ctx *ctx)
@@ -3464,9 +3466,8 @@ again:
min_key = 0;
max_key = 0;
while (1) {
- ret = log_dir_items(trans, root, inode, path,
- dst_path, key_type, ctx, min_key,
- &max_key);
+ ret = log_dir_items(trans, root, inode, path, dst_path, key_type,
+ ctx, min_key, &max_key);
if (ret)
return ret;
if (max_key == (u64)-1)
@@ -3595,34 +3596,34 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_root *log, struct btrfs_path *path,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
struct btrfs_inode_item *inode_item;
int ret;
ret = btrfs_insert_empty_item(trans, log, path,
- &BTRFS_I(inode)->location,
- sizeof(*inode_item));
+ &inode->location, sizeof(*inode_item));
if (ret && ret != -EEXIST)
return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
- fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
+ fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode,
+ 0, 0);
btrfs_release_path(path);
return 0;
}
static noinline int copy_items(struct btrfs_trans_handle *trans,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *dst_path,
struct btrfs_path *src_path, u64 *last_extent,
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
unsigned long src_offset;
unsigned long dst_offset;
- struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
+ struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
struct btrfs_inode_item *inode_item;
struct extent_buffer *src = src_path->nodes[0];
@@ -3633,7 +3634,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
char *ins_data;
int i;
struct list_head ordered_sums;
- int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
bool has_extents = false;
bool need_find_last_extent = true;
bool done = false;
@@ -3675,7 +3676,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
dst_path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, dst_path->nodes[0], inode_item,
- inode, inode_only == LOG_INODE_EXISTS,
+ &inode->vfs_inode,
+ inode_only == LOG_INODE_EXISTS,
logged_isize);
} else {
copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
@@ -3783,7 +3785,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
if (need_find_last_extent) {
u64 len;
- ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+ ret = btrfs_prev_leaf(inode->root, src_path);
if (ret < 0)
return ret;
if (ret)
@@ -3825,8 +3827,8 @@ fill_holes:
if (need_find_last_extent) {
/* btrfs_prev_leaf could return 1 without releasing the path */
btrfs_release_path(src_path);
- ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
- src_path, 0, 0);
+ ret = btrfs_search_slot(NULL, inode->root, &first_key,
+ src_path, 0, 0);
if (ret < 0)
return ret;
ASSERT(ret == 0);
@@ -3846,7 +3848,7 @@ fill_holes:
u64 extent_end;
if (i >= btrfs_header_nritems(src_path->nodes[0])) {
- ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+ ret = btrfs_next_leaf(inode->root, src_path);
if (ret < 0)
return ret;
ASSERT(ret == 0);
@@ -3881,8 +3883,7 @@ fill_holes:
offset = *last_extent;
len = key.offset - *last_extent;
ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
- offset, 0, 0, len, 0, len, 0,
- 0, 0);
+ offset, 0, 0, len, 0, len, 0, 0, 0);
if (ret)
break;
*last_extent = extent_end;
@@ -4055,7 +4056,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
}
static int log_one_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, struct btrfs_root *root,
+ struct btrfs_inode *inode, struct btrfs_root *root,
const struct extent_map *em,
struct btrfs_path *path,
const struct list_head *logged_list,
@@ -4072,8 +4073,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
int extent_inserted = 0;
bool ordered_io_err = false;
- ret = wait_ordered_extents(trans, inode, root, em, logged_list,
- &ordered_io_err);
+ ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em,
+ logged_list, &ordered_io_err);
if (ret)
return ret;
@@ -4084,7 +4085,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_init_map_token(&token);
- ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+ ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
em->start + em->len, NULL, 0, 1,
sizeof(*fi), &extent_inserted);
if (ret)
@@ -4150,7 +4151,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path,
struct list_head *logged_list,
struct btrfs_log_ctx *ctx,
@@ -4159,14 +4160,14 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
{
struct extent_map *em, *n;
struct list_head extents;
- struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *tree = &inode->extent_tree;
u64 test_gen;
int ret = 0;
int num = 0;
INIT_LIST_HEAD(&extents);
- down_write(&BTRFS_I(inode)->dio_sem);
+ down_write(&inode->dio_sem);
write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed;
@@ -4206,7 +4207,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
* without writing to the log tree and the fsync must report the
* file data write error and not commit the current transaction.
*/
- ret = filemap_check_errors(inode->i_mapping);
+ ret = filemap_check_errors(inode->vfs_inode.i_mapping);
if (ret)
ctx->io_err = ret;
process:
@@ -4235,13 +4236,13 @@ process:
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
- up_write(&BTRFS_I(inode)->dio_sem);
+ up_write(&inode->dio_sem);
btrfs_release_path(path);
return ret;
}
-static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
+static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
struct btrfs_path *path, u64 *size_ret)
{
struct btrfs_key key;
@@ -4279,7 +4280,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
*/
static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path)
{
@@ -4374,7 +4375,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
*/
static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4385,7 +4386,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_root *log = root->log_root;
const u64 ino = btrfs_ino(inode);
- const u64 i_size = i_size_read(inode);
+ const u64 i_size = i_size_read(&inode->vfs_inode);
if (!btrfs_fs_incompat(fs_info, NO_HOLES))
return 0;
@@ -4495,7 +4496,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
static int btrfs_check_ref_name_override(struct extent_buffer *eb,
const int slot,
const struct btrfs_key *key,
- struct inode *inode,
+ struct btrfs_inode *inode,
u64 *other_ino)
{
int ret;
@@ -4551,9 +4552,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
}
read_extent_buffer(eb, name, name_ptr, this_name_len);
- di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
- search_path, parent,
- name, this_name_len, 0);
+ di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
+ parent, name, this_name_len, 0);
if (di && !IS_ERR(di)) {
struct btrfs_key di_key;
@@ -4596,7 +4596,7 @@ out:
* This handles both files and directories.
*/
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root, struct btrfs_inode *inode,
int inode_only,
const loff_t start,
const loff_t end,
@@ -4618,7 +4618,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
int ins_nr;
bool fast_search = false;
u64 ino = btrfs_ino(inode);
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
@@ -4639,9 +4639,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
/* today the code can only do partial logging of directories */
- if (S_ISDIR(inode->i_mode) ||
+ if (S_ISDIR(inode->vfs_inode.i_mode) ||
(!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags) &&
+ &inode->runtime_flags) &&
inode_only >= LOG_INODE_EXISTS))
max_key.type = BTRFS_XATTR_ITEM_KEY;
else
@@ -4654,8 +4654,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* order for the log replay code to mark inodes for link count
* fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
*/
- if (S_ISDIR(inode->i_mode) ||
- BTRFS_I(inode)->generation > fs_info->last_trans_committed)
+ if (S_ISDIR(inode->vfs_inode.i_mode) ||
+ inode->generation > fs_info->last_trans_committed)
ret = btrfs_commit_inode_delayed_items(trans, inode);
else
ret = btrfs_commit_inode_delayed_inode(inode);
@@ -4668,17 +4668,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (inode_only == LOG_OTHER_INODE) {
inode_only = LOG_INODE_EXISTS;
- mutex_lock_nested(&BTRFS_I(inode)->log_mutex,
- SINGLE_DEPTH_NESTING);
+ mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING);
} else {
- mutex_lock(&BTRFS_I(inode)->log_mutex);
+ mutex_lock(&inode->log_mutex);
}
/*
* a brute force approach to making sure we get the most uptodate
* copies of everything.
*/
- if (S_ISDIR(inode->i_mode)) {
+ if (S_ISDIR(inode->vfs_inode.i_mode)) {
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
if (inode_only == LOG_INODE_EXISTS)
@@ -4699,31 +4698,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
- err = logged_inode_size(log, inode, path,
- &logged_isize);
+ err = logged_inode_size(log, inode, path, &logged_isize);
if (err)
goto out_unlock;
}
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
+ &inode->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_XATTR_ITEM_KEY;
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags);
+ &inode->runtime_flags);
while(1) {
ret = btrfs_truncate_inode_items(trans,
- log, inode, 0, 0);
+ log, &inode->vfs_inode, 0, 0);
if (ret != -EAGAIN)
break;
}
}
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags) ||
+ &inode->runtime_flags) ||
inode_only == LOG_INODE_EXISTS) {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
@@ -4764,18 +4762,17 @@ again:
if ((min_key.type == BTRFS_INODE_REF_KEY ||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
- BTRFS_I(inode)->generation == trans->transid) {
+ inode->generation == trans->transid) {
u64 other_ino = 0;
ret = btrfs_check_ref_name_override(path->nodes[0],
- path->slots[0],
- &min_key, inode,
- &other_ino);
+ path->slots[0], &min_key, inode,
+ &other_ino);
if (ret < 0) {
err = ret;
goto out_unlock;
} else if (ret > 0 && ctx &&
- other_ino != btrfs_ino(ctx->inode)) {
+ other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
struct btrfs_key inode_key;
struct inode *other_inode;
@@ -4823,9 +4820,10 @@ again:
* update the log with the new name before we
* unpin it.
*/
- err = btrfs_log_inode(trans, root, other_inode,
- LOG_OTHER_INODE,
- 0, LLONG_MAX, ctx);
+ err = btrfs_log_inode(trans, root,
+ BTRFS_I(other_inode),
+ LOG_OTHER_INODE, 0, LLONG_MAX,
+ ctx);
iput(other_inode);
if (err)
goto out_unlock;
@@ -4979,25 +4977,25 @@ log_extents:
write_unlock(&em_tree->lock);
}
- if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
+ if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
ret = log_directory_changes(trans, root, inode, path, dst_path,
- ctx);
+ ctx);
if (ret) {
err = ret;
goto out_unlock;
}
}
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->logged_trans = trans->transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
- spin_unlock(&BTRFS_I(inode)->lock);
+ spin_lock(&inode->lock);
+ inode->logged_trans = trans->transid;
+ inode->last_log_commit = inode->last_sub_trans;
+ spin_unlock(&inode->lock);
out_unlock:
if (unlikely(err))
btrfs_put_logged_extents(&logged_list);
else
btrfs_submit_logged_extents(&logged_list, log);
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_unlock(&inode->log_mutex);
btrfs_free_path(path);
btrfs_free_path(dst_path);
@@ -5021,13 +5019,13 @@ out_unlock:
* we logged the inode or it might have also done the unlink).
*/
static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
- struct inode *inode)
+ struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
bool ret = false;
- mutex_lock(&BTRFS_I(inode)->log_mutex);
- if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
+ mutex_lock(&inode->log_mutex);
+ if (inode->last_unlink_trans > fs_info->last_trans_committed) {
/*
* Make sure any commits to the log are forced to be full
* commits.
@@ -5035,7 +5033,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_set_log_full_commit(fs_info, trans);
ret = true;
}
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_unlock(&inode->log_mutex);
return ret;
}
@@ -5084,7 +5082,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
- if (btrfs_must_commit_transaction(trans, inode)) {
+ if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
ret = 1;
break;
}
@@ -5094,7 +5092,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent)) {
inode = d_inode(parent);
- if (btrfs_must_commit_transaction(trans, inode))
+ if (btrfs_must_commit_transaction(trans, BTRFS_I(inode)))
ret = 1;
break;
}
@@ -5159,7 +5157,7 @@ struct btrfs_dir_list {
*/
static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *start_inode,
+ struct btrfs_inode *start_inode,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5237,7 +5235,7 @@ process_leaf:
goto next_dir_inode;
}
- if (btrfs_inode_in_log(di_inode, trans->transid)) {
+ if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) {
iput(di_inode);
break;
}
@@ -5245,10 +5243,10 @@ process_leaf:
ctx->log_new_dentries = false;
if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
log_mode = LOG_INODE_ALL;
- ret = btrfs_log_inode(trans, root, di_inode,
+ ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
log_mode, 0, LLONG_MAX, ctx);
if (!ret &&
- btrfs_must_commit_transaction(trans, di_inode))
+ btrfs_must_commit_transaction(trans, BTRFS_I(di_inode)))
ret = 1;
iput(di_inode);
if (ret)
@@ -5297,7 +5295,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_root *root = BTRFS_I(inode)->root;
- const u64 ino = btrfs_ino(inode);
+ const u64 ino = btrfs_ino(BTRFS_I(inode));
path = btrfs_alloc_path();
if (!path)
@@ -5365,14 +5363,14 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
if (ctx)
ctx->log_new_dentries = false;
- ret = btrfs_log_inode(trans, root, dir_inode,
+ ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode),
LOG_INODE_ALL, 0, LLONG_MAX, ctx);
if (!ret &&
- btrfs_must_commit_transaction(trans, dir_inode))
+ btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode)))
ret = 1;
if (!ret && ctx && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
- dir_inode, ctx);
+ BTRFS_I(dir_inode), ctx);
iput(dir_inode);
if (ret)
goto out;
@@ -5436,7 +5434,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- if (btrfs_inode_in_log(inode, trans->transid)) {
+ if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
@@ -5445,7 +5443,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only,
+ start, end, ctx);
if (ret)
goto end_trans;
@@ -5521,7 +5520,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
break;
if (BTRFS_I(inode)->generation > last_committed) {
- ret = btrfs_log_inode(trans, root, inode,
+ ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
LOG_INODE_EXISTS,
0, LLONG_MAX, ctx);
if (ret)
@@ -5535,7 +5534,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
old_parent = parent;
}
if (log_dentries)
- ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+ ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx);
else
ret = 0;
end_trans:
@@ -5730,7 +5729,7 @@ error:
* inodes, etc) are done.
*/
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
int for_rename)
{
/*
@@ -5743,23 +5742,23 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* into the file. When the file is logged we check it and
* don't log the parents if the file is fully on disk.
*/
- mutex_lock(&BTRFS_I(inode)->log_mutex);
- BTRFS_I(inode)->last_unlink_trans = trans->transid;
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ mutex_lock(&inode->log_mutex);
+ inode->last_unlink_trans = trans->transid;
+ mutex_unlock(&inode->log_mutex);
/*
* if this directory was already logged any new
* names for this file/dir will get recorded
*/
smp_mb();
- if (BTRFS_I(dir)->logged_trans == trans->transid)
+ if (dir->logged_trans == trans->transid)
return;
/*
* if the inode we're about to unlink was logged,
* the log will be properly updated for any new names
*/
- if (BTRFS_I(inode)->logged_trans == trans->transid)
+ if (inode->logged_trans == trans->transid)
return;
/*
@@ -5776,9 +5775,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
return;
record:
- mutex_lock(&BTRFS_I(dir)->log_mutex);
- BTRFS_I(dir)->last_unlink_trans = trans->transid;
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
+ mutex_lock(&dir->log_mutex);
+ dir->last_unlink_trans = trans->transid;
+ mutex_unlock(&dir->log_mutex);
}
/*
@@ -5794,11 +5793,11 @@ record:
* parent root and tree of tree roots trees, etc) are done.
*/
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
- struct inode *dir)
+ struct btrfs_inode *dir)
{
- mutex_lock(&BTRFS_I(dir)->log_mutex);
- BTRFS_I(dir)->last_unlink_trans = trans->transid;
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
+ mutex_lock(&dir->log_mutex);
+ dir->last_unlink_trans = trans->transid;
+ mutex_unlock(&dir->log_mutex);
}
/*
@@ -5809,30 +5808,28 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
* full transaction commit is required.
*/
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *old_dir,
+ struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root * root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_root *root = inode->root;
/*
* this will force the logging code to walk the dentry chain
* up for the file
*/
- if (S_ISREG(inode->i_mode))
- BTRFS_I(inode)->last_unlink_trans = trans->transid;
+ if (S_ISREG(inode->vfs_inode.i_mode))
+ inode->last_unlink_trans = trans->transid;
/*
* if this inode hasn't been logged and directory we're renaming it
* from hasn't been logged, we don't need to log it
*/
- if (BTRFS_I(inode)->logged_trans <=
- fs_info->last_trans_committed &&
- (!old_dir || BTRFS_I(old_dir)->logged_trans <=
- fs_info->last_trans_committed))
+ if (inode->logged_trans <= fs_info->last_trans_committed &&
+ (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, inode, parent, 0,
+ return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0,
LLONG_MAX, 1, NULL);
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index ab858e31ccbc..483027f9a7f4 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -48,13 +48,13 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
- ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid;
+ WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid);
}
static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
- return ACCESS_ONCE(fs_info->last_trans_log_full_commit) ==
+ return READ_ONCE(fs_info->last_trans_log_full_commit) ==
trans->transid;
}
@@ -72,19 +72,19 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *dir, u64 index);
+ struct btrfs_inode *dir, u64 index);
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
- struct inode *inode, u64 dirid);
+ struct btrfs_inode *inode, u64 dirid);
void btrfs_end_log_trans(struct btrfs_root *root);
int btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct inode *inode,
+ struct btrfs_inode *dir, struct btrfs_inode *inode,
int for_rename);
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
- struct inode *dir);
+ struct btrfs_inode *dir);
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *old_dir,
+ struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent);
#endif
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index b1434bb57e36..d8edf164f81c 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -52,13 +52,13 @@ void ulist_init(struct ulist *ulist)
}
/**
- * ulist_fini - free up additionally allocated memory for the ulist
+ * ulist_release - free up additionally allocated memory for the ulist
* @ulist: the ulist from which to free the additional memory
*
* This is useful in cases where the base 'struct ulist' has been statically
* allocated.
*/
-static void ulist_fini(struct ulist *ulist)
+void ulist_release(struct ulist *ulist)
{
struct ulist_node *node;
struct ulist_node *next;
@@ -79,7 +79,7 @@ static void ulist_fini(struct ulist *ulist)
*/
void ulist_reinit(struct ulist *ulist)
{
- ulist_fini(ulist);
+ ulist_release(ulist);
ulist_init(ulist);
}
@@ -105,13 +105,13 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
* ulist_free - free dynamically allocated ulist
* @ulist: ulist to free
*
- * It is not necessary to call ulist_fini before.
+ * It is not necessary to call ulist_release before.
*/
void ulist_free(struct ulist *ulist)
{
if (!ulist)
return;
- ulist_fini(ulist);
+ ulist_release(ulist);
kfree(ulist);
}
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index a01a2c45825f..53c913632733 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -19,9 +19,6 @@
*
*/
struct ulist_iterator {
-#ifdef CONFIG_BTRFS_DEBUG
- int i;
-#endif
struct list_head *cur_list; /* hint to start search */
};
@@ -32,10 +29,6 @@ struct ulist_node {
u64 val; /* value to store */
u64 aux; /* auxiliary value saved along with the val */
-#ifdef CONFIG_BTRFS_DEBUG
- int seqnum; /* sequence number this node is added */
-#endif
-
struct list_head list; /* used to link node */
struct rb_node rb_node; /* used to speed up search */
};
@@ -51,6 +44,7 @@ struct ulist {
};
void ulist_init(struct ulist *ulist);
+void ulist_release(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(gfp_t gfp_mask);
void ulist_free(struct ulist *ulist);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b2e70073a10d..13e55d13045d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -134,8 +134,7 @@ const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
};
static int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_device *device);
+ struct btrfs_fs_info *fs_info);
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
@@ -2440,7 +2439,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
if (seeding_dev) {
mutex_lock(&fs_info->chunk_mutex);
- ret = init_first_rw_device(trans, fs_info, device);
+ ret = init_first_rw_device(trans, fs_info);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -4584,8 +4583,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
/ sizeof(struct btrfs_stripe) + 1)
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 start,
- u64 type)
+ u64 start, u64 type)
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
@@ -5009,12 +5007,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
chunk_offset = find_next_chunk(fs_info);
- return __btrfs_alloc_chunk(trans, fs_info, chunk_offset, type);
+ return __btrfs_alloc_chunk(trans, chunk_offset, type);
}
static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_device *device)
+ struct btrfs_fs_info *fs_info)
{
struct btrfs_root *extent_root = fs_info->extent_root;
u64 chunk_offset;
@@ -5024,14 +5021,13 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
chunk_offset = find_next_chunk(fs_info);
alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
- ret = __btrfs_alloc_chunk(trans, fs_info, chunk_offset, alloc_profile);
+ ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
if (ret)
return ret;
sys_chunk_offset = find_next_chunk(fs_info);
alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
- ret = __btrfs_alloc_chunk(trans, fs_info, sys_chunk_offset,
- alloc_profile);
+ ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
return ret;
}
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 9621c7f2503e..b3cbf80c5acf 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -47,8 +47,8 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
return -ENOMEM;
/* lookup the xattr by name */
- di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
- strlen(name), 0);
+ di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(BTRFS_I(inode)),
+ name, strlen(name), 0);
if (!di) {
ret = -ENODATA;
goto out;
@@ -108,8 +108,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
path->skip_release_on_error = 1;
if (!value) {
- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
- name, name_len, -1);
+ di = btrfs_lookup_xattr(trans, root, path,
+ btrfs_ino(BTRFS_I(inode)), name, name_len, -1);
if (!di && (flags & XATTR_REPLACE))
ret = -ENODATA;
else if (IS_ERR(di))
@@ -128,8 +128,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
*/
if (flags & XATTR_REPLACE) {
ASSERT(inode_is_locked(inode));
- di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
- name, name_len, 0);
+ di = btrfs_lookup_xattr(NULL, root, path,
+ btrfs_ino(BTRFS_I(inode)), name, name_len, 0);
if (!di)
ret = -ENODATA;
else if (IS_ERR(di))
@@ -140,7 +140,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
di = NULL;
}
- ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+ ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(BTRFS_I(inode)),
name, name_len, value, size);
if (ret == -EOVERFLOW) {
/*
@@ -278,7 +278,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
* NOTE: we set key.offset = 0; because we want to start with the
* first xattr that we find and walk forward
*/
- key.objectid = btrfs_ino(inode);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index 0e87401cf335..28484b3ebc98 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2395,7 +2395,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
loff_t pos, loff_t *bytes)
{
struct inode *inode = mapping->host;
- unsigned blocksize = 1 << inode->i_blkbits;
+ unsigned int blocksize = i_blocksize(inode);
struct page *page;
void *fsdata;
pgoff_t index, curidx;
@@ -2475,8 +2475,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
get_block_t *get_block, loff_t *bytes)
{
struct inode *inode = mapping->host;
- unsigned blocksize = 1 << inode->i_blkbits;
- unsigned zerofrom;
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int zerofrom;
int err;
err = cont_expand_zero(file, mapping, pos, bytes);
@@ -2838,7 +2838,7 @@ int nobh_truncate_page(struct address_space *mapping,
struct buffer_head map_bh;
int err;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
@@ -2916,7 +2916,7 @@ int block_truncate_page(struct address_space *mapping,
struct buffer_head *bh;
int err;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
@@ -3028,7 +3028,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
struct inode *inode = mapping->host;
tmp.b_state = 0;
tmp.b_blocknr = 0;
- tmp.b_size = 1 << inode->i_blkbits;
+ tmp.b_size = i_blocksize(inode);
get_block(inode, block, &tmp, 0);
return tmp.b_blocknr;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e4b066cd912a..f297a9e18642 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
+ osd_req_op_extent_update(req, 0, len);
break;
}
goto out_pages;
@@ -751,7 +752,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct pagevec pvec;
int done = 0;
int rc = 0;
- unsigned wsize = 1 << inode->i_blkbits;
+ unsigned int wsize = i_blocksize(inode);
struct ceph_osd_request *req = NULL;
int do_sync = 0;
loff_t snap_size, i_size;
@@ -771,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@@ -1017,8 +1018,7 @@ new_request:
&ci->i_layout, vino,
offset, &len, 0, num_ops,
CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, false);
if (IS_ERR(req)) {
@@ -1028,8 +1028,7 @@ new_request:
min(num_ops,
CEPH_OSD_SLAB_OPS),
CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, true);
BUG_ON(IS_ERR(req));
@@ -1194,7 +1193,7 @@ static int ceph_update_writeable_page(struct file *file,
int r;
struct ceph_snap_context *snapc, *oldest;
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
unlock_page(page);
return -EIO;
@@ -1386,8 +1385,9 @@ static void ceph_restore_sigs(sigset_t *oldset)
/*
* vm ops
*/
-static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ceph_filemap_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
@@ -1416,7 +1416,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
current->journal_info = vma->vm_file;
- ret = filemap_fault(vma, vmf);
+ ret = filemap_fault(vmf);
current->journal_info = NULL;
} else
ret = -EAGAIN;
@@ -1477,8 +1477,9 @@ out_restore:
/*
* Reuse write_begin here for simplicity.
*/
-static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ceph_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
@@ -1679,8 +1680,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 0, 1,
- CEPH_OSD_OP_CREATE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1697,8 +1697,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 1, 3,
- CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
if (IS_ERR(req)) {
@@ -1871,7 +1870,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
goto out_unlock;
}
- wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
+ wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 5bc5d37b1217..4e7421caf380 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
inode);
if (fscache_cookie_enabled(ci->fscache)) {
- dout("fscache_file_set_cookie %p %p enabing cache\n",
+ dout("fscache_file_set_cookie %p %p enabling cache\n",
inode, filp);
}
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94fd76d04683..cd966f276a8d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
/*
* Return caps we have registered with the MDS(s) as 'wanted'.
*/
-int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
{
struct ceph_cap *cap;
struct rb_node *p;
@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
- if (!__cap_is_valid(cap))
+ if (check && !__cap_is_valid(cap))
continue;
if (cap == ci->i_auth_cap)
mds_wanted |= cap->mds_wanted;
@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
delayed = 1;
}
ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+ if (want & ~cap->mds_wanted) {
+ /* user space may open/close single file frequently.
+ * This avoids droping mds_wanted immediately after
+ * requesting new mds_wanted.
+ */
+ __cap_set_timeouts(mdsc, ci);
+ }
cap->issued &= retain; /* drop bits we don't want */
if (cap->implemented & ~cap->issued) {
@@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
- ceph_sync_write_wait(inode);
-
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret < 0)
goto out;
@@ -2477,23 +2482,22 @@ again:
if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
int mds_wanted;
- if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
+ if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
*err = -EIO;
ret = 1;
goto out_unlock;
}
- mds_wanted = __ceph_caps_mds_wanted(ci);
- if ((mds_wanted & need) != need) {
+ mds_wanted = __ceph_caps_mds_wanted(ci, false);
+ if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
*err = -ESTALE;
ret = 1;
goto out_unlock;
}
- if ((mds_wanted & file_wanted) ==
- (file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
+ if (!(file_wanted & ~mds_wanted))
ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
}
@@ -3404,6 +3408,7 @@ retry:
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
ci->i_auth_cap = tcap;
+
if (!list_empty(&ci->i_cap_flush_list) &&
ci->i_auth_cap == tcap) {
spin_lock(&mdsc->cap_dirty_lock);
@@ -3417,9 +3422,18 @@ retry:
} else if (tsession) {
/* add placeholder for the export tagert */
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+ tcap = new_cap;
ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
+ if (!list_empty(&ci->i_cap_flush_list) &&
+ ci->i_auth_cap == tcap) {
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_move_tail(&ci->i_flushing_item,
+ &tcap->session->s_cap_flushing);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
+
__ceph_remove_cap(cap, false);
goto out_unlock;
}
@@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
}
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+ struct inode *dir,
int mds, int drop, int unless)
{
- struct inode *dir = d_inode(dentry->d_parent);
+ struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
@@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
+ if (!dir) {
+ parent = dget(dentry->d_parent);
+ dir = d_inode(parent);
+ }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+ dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 39ff678e567f..f2ae393e2c31 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
seq_puts(s, "\t(unsafe)");
else
seq_puts(s, "\t");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8ab1fdf0bd49..3e9ad501addf 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -371,7 +371,7 @@ more:
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
- req->r_direct_is_hash = true;
+ __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
if (fi->last_name) {
req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
if (!req->r_path2) {
@@ -417,7 +417,7 @@ more:
fi->frag = frag;
fi->last_readdir = req;
- if (req->r_did_prepopulate) {
+ if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
fi->readdir_cache_idx = req->r_readdir_cache_idx;
if (fi->readdir_cache_idx < 0) {
/* preclude from marking dir ordered */
@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mknod.mode = cpu_to_le32(mode);
req->r_args.mknod.rdev = cpu_to_le32(rdev);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
ceph_mdsc_put_request(req);
goto out;
}
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mkdir.mode = cpu_to_le32(mode);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_SHARED on source inode (mds will lock it) */
@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = drop_caps_for_unlink(inode);
@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
req->r_old_dentry_dir = old_dir;
- req->r_locked_dir = new_dir;
+ req->r_parent = new_dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
struct inode *dir;
if (flags & LOOKUP_RCU) {
- parent = ACCESS_ONCE(dentry->d_parent);
+ parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
@@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
op = ceph_snap(dir) == CEPH_SNAPDIR ?
- CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
+ CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (!IS_ERR(req)) {
req->r_dentry = dget(dentry);
- req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
+ req->r_num_caps = 2;
+ req->r_parent = dir;
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 180bbef760f2..e8f11fa565c5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
req->r_inode = d_inode(child);
ihold(d_inode(child));
req->r_ino2 = ceph_vino(d_inode(parent));
- req->r_locked_dir = d_inode(parent);
+ req->r_parent = d_inode(parent);
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 045d30d26624..26cc95421cca 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
- int mds_wanted = __ceph_caps_mds_wanted(ci);
+ int mds_wanted = __ceph_caps_mds_wanted(ci, true);
int issued = __ceph_caps_issued(ci, NULL);
dout("open %p fmode %d want %s issued %s using existing\n",
@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.open.mask = cpu_to_le32(mask);
- req->r_locked_dir = dir; /* caller holds dir->i_mutex */
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
@@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
goto out;
}
- req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
@@ -794,89 +793,6 @@ out:
kfree(aio_work);
}
-/*
- * Write commit request unsafe callback, called to tell us when a
- * request is unsafe (that is, in flight--has been handed to the
- * messenger to send to its target osd). It is called again when
- * we've received a response message indicating the request is
- * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
- * is completed early (and unsuccessfully) due to a timeout or
- * interrupt.
- *
- * This is used if we requested both an ACK and ONDISK commit reply
- * from the OSD.
- */
-static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
-{
- struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-
- dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
- unsafe ? "un" : "");
- if (unsafe) {
- ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
- spin_lock(&ci->i_unsafe_lock);
- list_add_tail(&req->r_unsafe_item,
- &ci->i_unsafe_writes);
- spin_unlock(&ci->i_unsafe_lock);
-
- complete_all(&req->r_completion);
- } else {
- spin_lock(&ci->i_unsafe_lock);
- list_del_init(&req->r_unsafe_item);
- spin_unlock(&ci->i_unsafe_lock);
- ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
- }
-}
-
-/*
- * Wait on any unsafe replies for the given inode. First wait on the
- * newest request, and make that the upper bound. Then, if there are
- * more requests, keep waiting on the oldest as long as it is still older
- * than the original request.
- */
-void ceph_sync_write_wait(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- struct list_head *head = &ci->i_unsafe_writes;
- struct ceph_osd_request *req;
- u64 last_tid;
-
- if (!S_ISREG(inode->i_mode))
- return;
-
- spin_lock(&ci->i_unsafe_lock);
- if (list_empty(head))
- goto out;
-
- /* set upper bound as _last_ entry in chain */
-
- req = list_last_entry(head, struct ceph_osd_request,
- r_unsafe_item);
- last_tid = req->r_tid;
-
- do {
- ceph_osdc_get_request(req);
- spin_unlock(&ci->i_unsafe_lock);
-
- dout("sync_write_wait on tid %llu (until %llu)\n",
- req->r_tid, last_tid);
- wait_for_completion(&req->r_done_completion);
- ceph_osdc_put_request(req);
-
- spin_lock(&ci->i_unsafe_lock);
- /*
- * from here on look at first entry in chain, since we
- * only want to wait for anything older than last_tid
- */
- if (list_empty(head))
- break;
- req = list_first_entry(head, struct ceph_osd_request,
- r_unsafe_item);
- } while (req->r_tid < last_tid);
-out:
- spin_unlock(&ci->i_unsafe_lock);
-}
-
static ssize_t
ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_snap_context *snapc,
@@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret2 < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret2);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
} else {
flags = CEPH_OSD_FLAG_READ;
}
@@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (ret < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ACK;
+ flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
while ((len = iov_iter_count(from)) > 0) {
size_t left;
@@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
goto out;
}
- /* get a second commit callback */
- req->r_unsafe_callback = ceph_sync_write_unsafe;
req->r_inode = inode;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
@@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
ceph_vino(inode),
offset, length,
0, 1, op,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5e659d054b40..fd8f771f99b7 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rdcache_gen = 0;
ci->i_rdcache_revoking = 0;
- INIT_LIST_HEAD(&ci->i_unsafe_writes);
INIT_LIST_HEAD(&ci->i_unsafe_dirops);
INIT_LIST_HEAD(&ci->i_unsafe_iops);
spin_lock_init(&ci->i_unsafe_lock);
@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
return 1;
}
-void ceph_evict_inode(struct inode *inode)
-{
- /* wait unsafe sync writes */
- ceph_sync_write_wait(inode);
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
-}
-
static inline blkcnt_t calc_inode_blocks(u64 size)
{
return (size + (1<<9) - 1) >> 9;
@@ -1016,7 +1007,9 @@ out:
static void update_dentry_lease(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
- unsigned long from_time)
+ unsigned long from_time,
+ struct ceph_vino *tgt_vino,
+ struct ceph_vino *dir_vino)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
long unsigned duration = le32_to_cpu(lease->duration_ms);
@@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry,
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
+ /*
+ * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
+ * we expect a negative dentry.
+ */
+ if (!tgt_vino && d_really_is_positive(dentry))
+ return;
+
+ if (tgt_vino && (d_really_is_negative(dentry) ||
+ !ceph_ino_compare(d_inode(dentry), tgt_vino)))
+ return;
+
spin_lock(&dentry->d_lock);
dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
dentry, duration, ttl);
- /* make lease_rdcache_gen match directory */
dir = d_inode(dentry->d_parent);
+ /* make sure parent matches dir_vino */
+ if (!ceph_ino_compare(dir, dir_vino))
+ goto out_unlock;
+
/* only track leases on regular dentries */
if (ceph_snap(dir) != CEPH_NOSNAP)
goto out_unlock;
@@ -1108,61 +1115,27 @@ out:
*
* Called with snap_rwsem (read).
*/
-int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
- struct ceph_mds_session *session)
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
{
+ struct ceph_mds_session *session = req->r_session;
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct inode *in = NULL;
- struct ceph_vino vino;
+ struct ceph_vino tvino, dvino;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
dout("fill_trace %p is_dentry %d is_target %d\n", req,
rinfo->head->is_dentry, rinfo->head->is_target);
-#if 0
- /*
- * Debugging hook:
- *
- * If we resend completed ops to a recovering mds, we get no
- * trace. Since that is very rare, pretend this is the case
- * to ensure the 'no trace' handlers in the callers behave.
- *
- * Fill in inodes unconditionally to avoid breaking cap
- * invariants.
- */
- if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
- pr_info("fill_trace faking empty trace on %lld %s\n",
- req->r_tid, ceph_mds_op_name(rinfo->head->op));
- if (rinfo->head->is_dentry) {
- rinfo->head->is_dentry = 0;
- err = fill_inode(req->r_locked_dir,
- &rinfo->diri, rinfo->dirfrag,
- session, req->r_request_started, -1);
- }
- if (rinfo->head->is_target) {
- rinfo->head->is_target = 0;
- ininfo = rinfo->targeti.in;
- vino.ino = le64_to_cpu(ininfo->ino);
- vino.snap = le64_to_cpu(ininfo->snapid);
- in = ceph_get_inode(sb, vino);
- err = fill_inode(in, &rinfo->targeti, NULL,
- session, req->r_request_started,
- req->r_fmode);
- iput(in);
- }
- }
-#endif
-
if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
dout("fill_trace reply is empty!\n");
- if (rinfo->head->result == 0 && req->r_locked_dir)
+ if (rinfo->head->result == 0 && req->r_parent)
ceph_invalidate_dir_request(req);
return 0;
}
if (rinfo->head->is_dentry) {
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
if (dir) {
err = fill_inode(dir, NULL,
@@ -1188,8 +1161,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
dname.name = rinfo->dname;
dname.len = rinfo->dname_len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
retry_lookup:
dn = d_lookup(parent, &dname);
dout("d_lookup on parent=%p name=%.*s got %p\n",
@@ -1206,8 +1179,8 @@ retry_lookup:
}
err = 0;
} else if (d_really_is_positive(dn) &&
- (ceph_ino(d_inode(dn)) != vino.ino ||
- ceph_snap(d_inode(dn)) != vino.snap)) {
+ (ceph_ino(d_inode(dn)) != tvino.ino ||
+ ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
d_delete(dn);
@@ -1221,10 +1194,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
- in = ceph_get_inode(sb, vino);
+ in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
@@ -1233,8 +1206,8 @@ retry_lookup:
err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
session, req->r_request_started,
- (!req->r_aborted && rinfo->head->result == 0) ?
- req->r_fmode : -1,
+ (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+ rinfo->head->result == 0) ? req->r_fmode : -1,
&req->r_caps_reservation);
if (err < 0) {
pr_err("fill_inode badness %p %llx.%llx\n",
@@ -1247,8 +1220,9 @@ retry_lookup:
* ignore null lease/binding on snapdir ENOENT, or else we
* will have trouble splicing in the virtual snapdir later
*/
- if (rinfo->head->is_dentry && !req->r_aborted &&
- req->r_locked_dir &&
+ if (rinfo->head->is_dentry &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+ test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
@@ -1257,17 +1231,19 @@ retry_lookup:
* mknod symlink mkdir : null -> new inode
* unlink : linked -> null
*/
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
struct dentry *dn = req->r_dentry;
bool have_dir_cap, have_lease;
BUG_ON(!dn);
BUG_ON(!dir);
BUG_ON(d_inode(dn->d_parent) != dir);
- BUG_ON(ceph_ino(dir) !=
- le64_to_cpu(rinfo->diri.in->ino));
- BUG_ON(ceph_snap(dir) !=
- le64_to_cpu(rinfo->diri.in->snapid));
+
+ dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+ dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+ BUG_ON(ceph_ino(dir) != dvino.ino);
+ BUG_ON(ceph_snap(dir) != dvino.snap);
/* do we have a lease on the whole dir? */
have_dir_cap =
@@ -1319,12 +1295,13 @@ retry_lookup:
ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
- } else {
- if (have_lease && d_unhashed(dn))
+ } else if (have_lease) {
+ if (d_unhashed(dn))
d_add(dn, NULL);
update_dentry_lease(dn, rinfo->dlease,
session,
- req->r_request_started);
+ req->r_request_started,
+ NULL, &dvino);
}
goto done;
}
@@ -1347,15 +1324,19 @@ retry_lookup:
have_lease = false;
}
- if (have_lease)
+ if (have_lease) {
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
update_dentry_lease(dn, rinfo->dlease, session,
- req->r_request_started);
+ req->r_request_started,
+ &tvino, &dvino);
+ }
dout(" final dn %p\n", dn);
- } else if (!req->r_aborted &&
- (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
- req->r_op == CEPH_MDS_OP_MKSNAP)) {
+ } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP) &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
struct dentry *dn = req->r_dentry;
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
/* fill out a snapdir LOOKUPSNAP dentry */
BUG_ON(!dn);
@@ -1370,6 +1351,26 @@ retry_lookup:
goto done;
}
req->r_dentry = dn; /* may have spliced */
+ } else if (rinfo->head->is_dentry) {
+ struct ceph_vino *ptvino = NULL;
+
+ if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
+ le32_to_cpu(rinfo->dlease->duration_ms)) {
+ dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+ dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+ if (rinfo->head->is_target) {
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ ptvino = &tvino;
+ }
+
+ update_dentry_lease(req->r_dentry, rinfo->dlease,
+ session, req->r_request_started, ptvino,
+ &dvino);
+ } else {
+ dout("%s: no dentry lease or dir cap\n", __func__);
+ }
}
done:
dout("fill_trace done err=%d\n", err);
@@ -1478,7 +1479,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
- if (req->r_aborted)
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
return readdir_prepopulate_inodes_only(req, session);
if (rinfo->hash_order && req->r_path2) {
@@ -1523,14 +1524,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
/* FIXME: release caps/leases if error occurs */
for (i = 0; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
- struct ceph_vino vino;
+ struct ceph_vino tvino, dvino;
dname.name = rde->name;
dname.len = rde->name_len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
- vino.ino = le64_to_cpu(rde->inode.in->ino);
- vino.snap = le64_to_cpu(rde->inode.in->snapid);
+ tvino.ino = le64_to_cpu(rde->inode.in->ino);
+ tvino.snap = le64_to_cpu(rde->inode.in->snapid);
if (rinfo->hash_order) {
u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
@@ -1559,8 +1560,8 @@ retry_lookup:
goto out;
}
} else if (d_really_is_positive(dn) &&
- (ceph_ino(d_inode(dn)) != vino.ino ||
- ceph_snap(d_inode(dn)) != vino.snap)) {
+ (ceph_ino(d_inode(dn)) != tvino.ino ||
+ ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
d_delete(dn);
@@ -1572,7 +1573,7 @@ retry_lookup:
if (d_really_is_positive(dn)) {
in = d_inode(dn);
} else {
- in = ceph_get_inode(parent->d_sb, vino);
+ in = ceph_get_inode(parent->d_sb, tvino);
if (IS_ERR(in)) {
dout("new_inode badness\n");
d_drop(dn);
@@ -1617,8 +1618,9 @@ retry_lookup:
ceph_dentry(dn)->offset = rde->offset;
+ dvino = ceph_vino(d_inode(parent));
update_dentry_lease(dn, rde->lease, req->r_session,
- req->r_request_started);
+ req->r_request_started, &tvino, &dvino);
if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
ret = fill_readdir_cache(d_inode(parent), dn,
@@ -1632,7 +1634,7 @@ next_item:
}
out:
if (err == 0 && skipped == 0) {
- req->r_did_prepopulate = true;
+ set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
req->r_readdir_cache_idx = cache_ctl.index;
}
ceph_readdir_cache_release(&cache_ctl);
@@ -1720,7 +1722,7 @@ static void ceph_invalidate_work(struct work_struct *work)
mutex_lock(&ci->i_truncate_mutex);
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 7d752d53353a..4c9c72f26eb9 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
l.stripe_count = ci->i_layout.stripe_count;
l.object_size = ci->i_layout.object_size;
l.data_pool = ci->i_layout.pool_id;
- l.preferred_osd = (s32)-1;
+ l.preferred_osd = -1;
if (copy_to_user(arg, &l, sizeof(l)))
return -EFAULT;
}
@@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
nl.data_pool = ci->i_layout.pool_id;
/* this is obsolete, and always -1 */
- nl.preferred_osd = le64_to_cpu(-1);
+ nl.preferred_osd = -1;
err = __validate_layout(mdsc, &nl);
if (err)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c9d2e553a6c4..c681762d76e6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref)
ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
iput(req->r_inode);
}
- if (req->r_locked_dir)
- ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_parent)
+ ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ /* Never leave an unregistered request on an unsafe list! */
+ list_del_init(&req->r_unsafe_item);
+
if (req->r_tid == mdsc->oldest_tid) {
struct rb_node *p = rb_next(&req->r_node);
mdsc->oldest_tid = 0;
@@ -644,13 +647,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
erase_request(&mdsc->request_tree, req);
- if (req->r_unsafe_dir && req->r_got_unsafe) {
+ if (req->r_unsafe_dir &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock);
}
- if (req->r_target_inode && req->r_got_unsafe) {
+ if (req->r_target_inode &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_target_item);
@@ -668,6 +673,28 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
}
/*
+ * Walk back up the dentry tree until we hit a dentry representing a
+ * non-snapshot inode. We do this using the rcu_read_lock (which must be held
+ * when calling this) to ensure that the objects won't disappear while we're
+ * working with them. Once we hit a candidate dentry, we attempt to take a
+ * reference to it, and return that as the result.
+ */
+static struct inode *get_nonsnap_parent(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ while (dentry && !IS_ROOT(dentry)) {
+ inode = d_inode_rcu(dentry);
+ if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
+ break;
+ dentry = dentry->d_parent;
+ }
+ if (inode)
+ inode = igrab(inode);
+ return inode;
+}
+
+/*
* Choose mds to send request to next. If there is a hint set in the
* request (e.g., due to a prior forward hint from the mds), use that.
* Otherwise, consult frag tree and/or caps to identify the
@@ -675,19 +702,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
- /*
- * we don't need to worry about protecting the d_parent access
- * here because we never renaming inside the snapped namespace
- * except to resplice to another snapdir, and either the old or new
- * result is a valid result.
- */
- while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
- dentry = dentry->d_parent;
- return dentry;
-}
-
static int __choose_mds(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
@@ -697,7 +711,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
int mode = req->r_direct_mode;
int mds = -1;
u32 hash = req->r_direct_hash;
- bool is_hash = req->r_direct_is_hash;
+ bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
/*
* is there a specific mds we should try? ignore hint if we have
@@ -717,30 +731,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = NULL;
if (req->r_inode) {
inode = req->r_inode;
+ ihold(inode);
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
- struct dentry *parent = req->r_dentry->d_parent;
- struct inode *dir = d_inode(parent);
+ struct dentry *parent;
+ struct inode *dir;
+
+ rcu_read_lock();
+ parent = req->r_dentry->d_parent;
+ dir = req->r_parent ? : d_inode_rcu(parent);
- if (dir->i_sb != mdsc->fsc->sb) {
- /* not this fs! */
+ if (!dir || dir->i_sb != mdsc->fsc->sb) {
+ /* not this fs or parent went negative */
inode = d_inode(req->r_dentry);
+ if (inode)
+ ihold(inode);
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
- struct dentry *dn = get_nonsnap_parent(parent);
- inode = d_inode(dn);
+ inode = get_nonsnap_parent(parent);
dout("__choose_mds using nonsnap parent %p\n", inode);
} else {
/* dentry target */
inode = d_inode(req->r_dentry);
if (!inode || mode == USE_AUTH_MDS) {
/* dir + name */
- inode = dir;
+ inode = igrab(dir);
hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
+ } else {
+ ihold(inode);
}
}
+ rcu_read_unlock();
}
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
@@ -769,7 +792,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
(int)r, frag.ndist);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
/* since this file/dir wasn't known to be
@@ -784,7 +807,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), frag.frag, mds);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
}
}
@@ -797,6 +820,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
+ iput(inode);
goto random;
}
mds = cap->session->s_mds;
@@ -804,6 +828,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), mds,
cap == ci->i_auth_cap ? "auth " : "", cap);
spin_unlock(&ci->i_ceph_lock);
+out:
+ iput(inode);
return mds;
random:
@@ -1036,7 +1062,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
while (!list_empty(&session->s_unsafe)) {
req = list_first_entry(&session->s_unsafe,
struct ceph_mds_request, r_unsafe_item);
- list_del_init(&req->r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
__unregister_request(mdsc, req);
@@ -1146,7 +1171,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
if (ci->i_wrbuffer_ref > 0 &&
- ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
invalidate = true;
while (!list_empty(&ci->i_cap_flush_list)) {
@@ -1775,18 +1800,23 @@ retry:
return path;
}
-static int build_dentry_path(struct dentry *dentry,
+static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
int *pfreepath)
{
char *path;
- if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
- *pino = ceph_ino(d_inode(dentry->d_parent));
+ rcu_read_lock();
+ if (!dir)
+ dir = d_inode_rcu(dentry->d_parent);
+ if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+ *pino = ceph_ino(dir);
+ rcu_read_unlock();
*ppath = dentry->d_name.name;
*ppathlen = dentry->d_name.len;
return 0;
}
+ rcu_read_unlock();
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -1822,8 +1852,8 @@ static int build_inode_path(struct inode *inode,
* an explicit ino+path.
*/
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
- const char *rpath, u64 rino,
- const char **ppath, int *pathlen,
+ struct inode *rdiri, const char *rpath,
+ u64 rino, const char **ppath, int *pathlen,
u64 *ino, int *freepath)
{
int r = 0;
@@ -1833,7 +1863,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
ceph_snap(rinode));
} else if (rdentry) {
- r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+ r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
+ freepath);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -1866,7 +1897,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
int ret;
ret = set_request_path_attr(req->r_inode, req->r_dentry,
- req->r_path1, req->r_ino1.ino,
+ req->r_parent, req->r_path1, req->r_ino1.ino,
&path1, &pathlen1, &ino1, &freepath1);
if (ret < 0) {
msg = ERR_PTR(ret);
@@ -1874,6 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
}
ret = set_request_path_attr(NULL, req->r_old_dentry,
+ req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path2, &pathlen2, &ino2, &freepath2);
if (ret < 0) {
@@ -1927,10 +1959,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
mds, req->r_inode_drop, req->r_inode_unless, 0);
if (req->r_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_dentry,
- mds, req->r_dentry_drop, req->r_dentry_unless);
+ req->r_parent, mds, req->r_dentry_drop,
+ req->r_dentry_unless);
if (req->r_old_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
- mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+ req->r_old_dentry_dir, mds,
+ req->r_old_dentry_drop,
+ req->r_old_dentry_unless);
if (req->r_old_inode_drop)
releases += ceph_encode_inode_release(&p,
d_inode(req->r_old_dentry),
@@ -2012,7 +2047,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
- if (req->r_got_unsafe) {
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
/*
* Replay. Do not regenerate message (and rebuild
@@ -2061,16 +2096,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead = msg->front.iov_base;
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
- if (req->r_locked_dir)
+ if (req->r_parent)
flags |= CEPH_MDS_FLAG_WANT_DENTRY;
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
rhead->ino = 0;
- dout(" r_locked_dir = %p\n", req->r_locked_dir);
+ dout(" r_parent = %p\n", req->r_parent);
return 0;
}
@@ -2084,8 +2119,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = 0;
- if (req->r_err || req->r_got_result) {
- if (req->r_aborted)
+ if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
__unregister_request(mdsc, req);
goto out;
}
@@ -2096,12 +2131,12 @@ static int __do_request(struct ceph_mds_client *mdsc,
err = -EIO;
goto finish;
}
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout("do_request forced umount\n");
err = -EIO;
goto finish;
}
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
@@ -2215,7 +2250,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
continue;
if (req->r_attempts > 0)
continue; /* only new requests */
@@ -2250,11 +2285,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
dout("do_request on %p\n", req);
- /* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+ /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
- if (req->r_locked_dir)
- ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_parent)
+ ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
if (req->r_old_dentry_dir)
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
@@ -2289,7 +2324,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
/* only abort if we didn't race with a real reply */
- if (req->r_got_result) {
+ if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
err = le32_to_cpu(req->r_reply_info.head->result);
} else if (err < 0) {
dout("aborted request %lld with %d\n", req->r_tid, err);
@@ -2301,10 +2336,10 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
*/
mutex_lock(&req->r_fill_mutex);
req->r_err = err;
- req->r_aborted = true;
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
mutex_unlock(&req->r_fill_mutex);
- if (req->r_locked_dir &&
+ if (req->r_parent &&
(req->r_op & CEPH_MDS_OP_WRITE))
ceph_invalidate_dir_request(req);
} else {
@@ -2323,7 +2358,7 @@ out:
*/
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
{
- struct inode *inode = req->r_locked_dir;
+ struct inode *inode = req->r_parent;
dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
@@ -2379,14 +2414,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
}
/* dup? */
- if ((req->r_got_unsafe && !head->safe) ||
- (req->r_got_safe && head->safe)) {
+ if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
+ (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
pr_warn("got a dup %s reply on %llu from mds%d\n",
head->safe ? "safe" : "unsafe", tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
}
- if (req->r_got_safe) {
+ if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
pr_warn("got unsafe after safe on %llu from mds%d\n",
tid, mds);
mutex_unlock(&mdsc->mutex);
@@ -2425,10 +2460,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
- req->r_got_safe = true;
+ set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
__unregister_request(mdsc, req);
- if (req->r_got_unsafe) {
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
/*
* We already handled the unsafe response, now do the
* cleanup. No need to examine the response; the MDS
@@ -2437,7 +2472,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
* useful we could do with a revised return value.
*/
dout("got safe reply %llu, mds%d\n", tid, mds);
- list_del_init(&req->r_unsafe_item);
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
@@ -2446,7 +2480,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
goto out;
}
} else {
- req->r_got_unsafe = true;
+ set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
if (req->r_unsafe_dir) {
struct ceph_inode_info *ci =
@@ -2486,7 +2520,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* insert trace into our cache */
mutex_lock(&req->r_fill_mutex);
current->journal_info = req;
- err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+ err = ceph_fill_trace(mdsc->fsc->sb, req);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
req->r_op == CEPH_MDS_OP_LSSNAP))
@@ -2500,7 +2534,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (realm)
ceph_put_snap_realm(mdsc, realm);
- if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
+ if (err == 0 && req->r_target_inode &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
spin_lock(&ci->i_unsafe_lock);
list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
@@ -2508,12 +2543,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
}
out_err:
mutex_lock(&mdsc->mutex);
- if (!req->r_aborted) {
+ if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
if (err) {
req->r_err = err;
} else {
req->r_reply = ceph_msg_get(msg);
- req->r_got_result = true;
+ set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
}
} else {
dout("reply arrived after request %lld was aborted\n", tid);
@@ -2557,7 +2592,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
goto out; /* dup reply? */
}
- if (req->r_aborted) {
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
dout("forward tid %llu aborted, unregistering\n", tid);
__unregister_request(mdsc, req);
} else if (fwd_seq <= req->r_num_fwd) {
@@ -2567,7 +2602,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
/* resend. forward race not possible; mds would drop */
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
BUG_ON(req->r_err);
- BUG_ON(req->r_got_result);
+ BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
req->r_attempts = 0;
req->r_num_fwd = fwd_seq;
req->r_resend_mds = next_mds;
@@ -2732,7 +2767,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
continue;
if (req->r_attempts == 0)
continue; /* only old requests */
@@ -3556,7 +3591,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@@ -3587,7 +3622,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
*/
static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
{
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return true;
return atomic_read(&mdsc->num_sessions) <= skipped;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3c6f77b7bb02..ac0475a2daa7 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -202,9 +202,18 @@ struct ceph_mds_request {
char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2;
- struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+ struct inode *r_parent; /* parent dir inode */
struct inode *r_target_inode; /* resulting inode */
+#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */
+#define CEPH_MDS_R_ABORTED (2) /* call was aborted */
+#define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */
+#define CEPH_MDS_R_GOT_SAFE (4) /* got a safe reply */
+#define CEPH_MDS_R_GOT_RESULT (5) /* got a result */
+#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
+#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
+ unsigned long r_req_flags;
+
struct mutex r_fill_mutex;
union ceph_mds_request_args r_args;
@@ -216,7 +225,6 @@ struct ceph_mds_request {
/* for choosing which mds to send this request to */
int r_direct_mode;
u32 r_direct_hash; /* choose dir frag based on this dentry hash */
- bool r_direct_is_hash; /* true if r_direct_hash is valid */
/* data payload is used for xattr ops */
struct ceph_pagelist *r_pagelist;
@@ -234,7 +242,6 @@ struct ceph_mds_request {
struct ceph_mds_reply_info_parsed r_reply_info;
struct page *r_locked_page;
int r_err;
- bool r_aborted;
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */
@@ -262,9 +269,7 @@ struct ceph_mds_request {
ceph_mds_request_callback_t r_callback;
ceph_mds_request_wait_callback_t r_wait_for_completion;
struct list_head r_unsafe_item; /* per-session unsafe list item */
- bool r_got_unsafe, r_got_safe, r_got_result;
- bool r_did_prepopulate;
long long r_dir_release_cnt;
long long r_dir_ordered_cnt;
int r_readdir_cache_idx;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6bd20d707bfd..0ec8d0114e57 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = {
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
.drop_inode = ceph_drop_inode,
- .evict_inode = ceph_evict_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
@@ -952,6 +951,14 @@ static int ceph_register_bdi(struct super_block *sb,
fsc->backing_dev_info.ra_pages =
VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+ if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
+ fsc->mount_options->rsize >= PAGE_SIZE)
+ fsc->backing_dev_info.io_pages =
+ (fsc->mount_options->rsize + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
+ else if (fsc->mount_options->rsize == 0)
+ fsc->backing_dev_info.io_pages = ULONG_MAX;
+
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
atomic_long_inc_return(&bdi_seq));
if (!err)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3373b61faefd..e9410bcf4113 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -45,8 +45,8 @@
#define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
-#define CEPH_RSIZE_DEFAULT 0 /* max read size */
-#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */
+#define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */
+#define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -343,7 +343,6 @@ struct ceph_inode_info {
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
- struct list_head i_unsafe_writes; /* uncommitted sync writes */
struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
struct list_head i_unsafe_iops; /* uncommitted mds inode ops */
spinlock_t i_unsafe_lock;
@@ -602,7 +601,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
}
/* what the mds thinks we want */
-extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
extern void ceph_caps_init(struct ceph_mds_client *mdsc);
extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
@@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb);
extern void ceph_destroy_inode(struct inode *inode);
extern int ceph_drop_inode(struct inode *inode);
-extern void ceph_evict_inode(struct inode *inode);
extern struct inode *ceph_get_inode(struct super_block *sb,
struct ceph_vino vino);
@@ -764,8 +762,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec *ctime,
struct timespec *mtime, struct timespec *atime);
extern int ceph_fill_trace(struct super_block *sb,
- struct ceph_mds_request *req,
- struct ceph_mds_session *session);
+ struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct ceph_mds_session *session);
@@ -904,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
int mds, int drop, int unless, int force);
extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+ struct inode *dir,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
@@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
char *data, size_t len);
-extern void ceph_sync_write_wait(struct inode *inode);
+
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct file_operations ceph_snapdir_fops;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 98dc842e7245..aa3debbba826 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3282,7 +3282,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
* sure that it doesn't change while being written back.
*/
static int
-cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
diff --git a/fs/dax.c b/fs/dax.c
index 3f1181563fb1..7436c98b92c8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -925,12 +925,11 @@ static int dax_insert_mapping(struct address_space *mapping,
/**
* dax_pfn_mkwrite - handle first write to DAX page
- * @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
*/
-int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct address_space *mapping = file->f_mapping;
void *entry, **slot;
pgoff_t index = vmf->pgoff;
@@ -1119,20 +1118,10 @@ static int dax_fault_return(int error)
return VM_FAULT_SIGBUS;
}
-/**
- * dax_iomap_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @ops: iomap ops passed from the file system
- *
- * When a page fault occurs, filesystems may call this helper in their fault
- * or mkwrite handler for DAX files. Assumes the caller has done all the
- * necessary locking for the page fault to proceed successfully.
- */
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops)
+static int dax_iomap_pte_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
@@ -1205,11 +1194,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, sector,
- PAGE_SIZE, &entry, vma, vmf);
+ PAGE_SIZE, &entry, vmf->vma, vmf);
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error == -EBUSY)
error = 0;
@@ -1247,7 +1236,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
}
return vmf_ret;
}
-EXPORT_SYMBOL_GPL(dax_iomap_fault);
#ifdef CONFIG_FS_DAX_PMD
/*
@@ -1338,7 +1326,8 @@ fallback:
return VM_FAULT_FALLBACK;
}
-int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops)
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
{
struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
@@ -1446,5 +1435,34 @@ out:
trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
return result;
}
-EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
+#else
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
+{
+ return VM_FAULT_FALLBACK;
+}
#endif /* CONFIG_FS_DAX_PMD */
+
+/**
+ * dax_iomap_fault - handle a page fault on a DAX file
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in
+ * their fault handler for DAX files. dax_iomap_fault() assumes the caller
+ * has done all the necessary locking for page fault to proceed
+ * successfully.
+ */
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+ const struct iomap_ops *ops)
+{
+ switch (pe_size) {
+ case PE_SIZE_PTE:
+ return dax_iomap_pte_fault(vmf, ops);
+ case PE_SIZE_PMD:
+ return dax_iomap_pmd_fault(vmf, ops);
+ default:
+ return VM_FAULT_FALLBACK;
+ }
+}
+EXPORT_SYMBOL_GPL(dax_iomap_fault);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index c87bae4376b8..a04ebea77de8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -587,7 +587,7 @@ static int dio_set_defer_completion(struct dio *dio)
/*
* Call into the fs to map some more disk blocks. We record the current number
* of available blocks at sdio->blocks_available. These are in units of the
- * fs blocksize, (1 << inode->i_blkbits).
+ * fs blocksize, i_blocksize(inode).
*
* The fs is allowed to map lots of blocks at once. If it wants to do that,
* it uses the passed inode-relative block number as the file offset, as usual.
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 866bb18efefe..e00d45af84ea 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -123,7 +123,7 @@ void ecryptfs_destroy_kthread(void)
* @lower_dentry: Lower dentry for file to open
* @lower_mnt: Lower vfsmount for file to open
*
- * This function gets a r/w file opened againt the lower dentry.
+ * This function gets a r/w file opened against the lower dentry.
*
* Returns zero on success; non-zero otherwise
*/
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bcb68fcc8445..5ec16313da1a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1895,7 +1895,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
* Also, we do not currently supported nested exclusive wakeups.
*/
- if (epds.events & EPOLLEXCLUSIVE) {
+ if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
if (op == EPOLL_CTL_MOD)
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index b0f241528a30..b21891a6bfca 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -87,19 +87,19 @@ out_unlock:
* The default page_lock and i_size verification done by non-DAX fault paths
* is sufficient because ext2 doesn't support hole punching.
*/
-static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ext2_dax_fault(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;
if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
down_read(&ei->dax_sem);
- ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops);
up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE)
@@ -107,16 +107,15 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return ret;
}
-static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int ext2_dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
loff_t size;
int ret;
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
down_read(&ei->dax_sem);
/* check that the faulting page hasn't raced with truncate */
@@ -124,7 +123,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
@@ -134,7 +133,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
/*
- * .pmd_fault is not supported for DAX because allocation in ext2
+ * .huge_fault is not supported for DAX because allocation in ext2
* cannot be reliably aligned to huge page sizes and so pmd faults
* will always fail and fail back to regular faults.
*/
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cee23b684f47..2fd17e8e4984 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2483,8 +2483,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_page_mkwrite(struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 37e059202cd2..e7f12a204cbc 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -84,7 +84,7 @@
* -- writeout
* Writeout looks up whole page cache to see if a buffer is
* mapped, If there are not very many delayed buffers, then it is
- * time comsuming.
+ * time consuming.
*
* With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
* bigalloc and writeout can figure out if a block or a range of
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 13021a054fc0..8210c1f43556 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -253,19 +253,20 @@ out:
}
#ifdef CONFIG_FS_DAX
-static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ext4_dax_huge_fault(struct vm_fault *vmf,
+ enum page_entry_size pe_size)
{
int result;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) {
sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
down_read(&EXT4_I(inode)->i_mmap_sem);
- result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
+ result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
up_read(&EXT4_I(inode)->i_mmap_sem);
if (write)
sb_end_pagefault(sb);
@@ -273,25 +274,9 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return result;
}
-static int
-ext4_dax_pmd_fault(struct vm_fault *vmf)
+static int ext4_dax_fault(struct vm_fault *vmf)
{
- int result;
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct super_block *sb = inode->i_sb;
- bool write = vmf->flags & FAULT_FLAG_WRITE;
-
- if (write) {
- sb_start_pagefault(sb);
- file_update_time(vmf->vma->vm_file);
- }
- down_read(&EXT4_I(inode)->i_mmap_sem);
- result = dax_iomap_pmd_fault(vmf, &ext4_iomap_ops);
- up_read(&EXT4_I(inode)->i_mmap_sem);
- if (write)
- sb_end_pagefault(sb);
-
- return result;
+ return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
}
/*
@@ -303,22 +288,21 @@ ext4_dax_pmd_fault(struct vm_fault *vmf)
* wp_pfn_shared() fails. Thus fault gets retried and things work out as
* desired.
*/
-static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
loff_t size;
int ret;
sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
@@ -327,7 +311,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
- .pmd_fault = ext4_dax_pmd_fault,
+ .huge_fault = ext4_dax_huge_fault,
.page_mkwrite = ext4_dax_fault,
.pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 75212a6e69f8..971f66342080 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2221,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
{
struct inode *inode = mpd->inode;
int err;
- ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
+ ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
do {
@@ -3577,7 +3577,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
if (overwrite)
get_block_func = ext4_dio_get_block_overwrite;
else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
- round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
+ round_down(offset, i_blocksize(inode)) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
} else if (is_sync_kiocb(iocb)) {
@@ -5179,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
* do. We do the check mainly to optimize the common PAGE_SIZE ==
* blocksize case
*/
- if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
+ if (offset > PAGE_SIZE - i_blocksize(inode))
return;
while (1) {
page = find_lock_page(inode->i_mapping,
@@ -5821,8 +5821,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
return !buffer_mapped(bh);
}
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int ext4_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
loff_t size;
unsigned long len;
@@ -5912,13 +5913,13 @@ out:
return ret;
}
-int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int ext4_filemap_fault(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int err;
down_read(&EXT4_I(inode)->i_mmap_sem);
- err = filemap_fault(vma, vmf);
+ err = filemap_fault(vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
return err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 10c62de642c6..354dc1a894c2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -838,7 +838,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
inode = page->mapping->host;
sb = inode->i_sb;
ngroups = ext4_get_groups_count(sb);
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
blocks_per_page = PAGE_SIZE / blocksize;
groups_per_page = blocks_per_page >> 1;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6fc14def0c70..578f8c33fb44 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
if (PageUptodate(page))
return 0;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 49f10dce817d..1edc86e874e3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -32,11 +32,10 @@
#include "trace.h"
#include <trace/events/f2fs.h>
-static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
int err;
@@ -58,7 +57,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
f2fs_balance_fs(sbi, dn.node_changed);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2401c5dabb2a..e80bfd06daf5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2043,12 +2043,12 @@ static void fuse_vma_close(struct vm_area_struct *vma)
* - sync(2)
* - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
*/
-static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int fuse_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
lock_page(page);
if (page->mapping != inode->i_mapping) {
unlock_page(page);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 016c11eaca7c..6fe2a59c6a9a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -379,10 +379,10 @@ static int gfs2_allocate_page_backing(struct page *page)
* blocks allocated on disk to back that page.
*/
-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int gfs2_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
@@ -399,7 +399,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out;
- gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE);
+ gfs2_size_hint(vmf->vma->vm_file, pos, PAGE_SIZE);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -407,7 +407,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out_uninit;
/* Update file times before taking page lock */
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index a3ec3ae7d347..482081bcdf70 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -38,7 +38,7 @@ static int hfs_get_last_session(struct super_block *sb,
/* default values */
*start = 0;
- *size = sb->s_bdev->bd_inode->i_size >> 9;
+ *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
if (HFS_SB(sb)->session >= 0) {
te.cdte_track = HFS_SB(sb)->session;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index ebb85e5f6549..e254fa0f0697 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -132,7 +132,7 @@ static int hfsplus_get_last_session(struct super_block *sb,
/* default values */
*start = 0;
- *size = sb->s_bdev->bd_inode->i_size >> 9;
+ *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
if (HFSPLUS_SB(sb)->session >= 0) {
te.cdte_track = HFSPLUS_SB(sb)->session;
diff --git a/fs/iomap.c b/fs/iomap.c
index d89f70bbb952..0f85f2410605 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -420,8 +420,8 @@ int
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops)
{
- unsigned blocksize = (1 << inode->i_blkbits);
- unsigned off = pos & (blocksize - 1);
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int off = pos & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!off)
@@ -445,11 +445,10 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
return length;
}
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
- const struct iomap_ops *ops)
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
unsigned long length;
loff_t offset, size;
ssize_t ret;
@@ -736,9 +735,9 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap)
{
struct iomap_dio *dio = data;
- unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
- unsigned fs_block_size = (1 << inode->i_blkbits), pad;
- unsigned align = iov_iter_alignment(dio->submit.iter);
+ unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+ unsigned int fs_block_size = i_blocksize(inode), pad;
+ unsigned int align = iov_iter_alignment(dio->submit.iter);
struct iov_iter iter;
struct bio *bio;
bool need_zeroout = false;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2be7c9ce6663..c64c2574a0aa 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
sb->s_blocksize - offset : toread;
tmp_bh.b_state = 0;
- tmp_bh.b_size = 1 << inode->i_blkbits;
+ tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 0);
if (err)
return err;
@@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
- tmp_bh.b_size = 1 << inode->i_blkbits;
+ tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 1);
if (err)
goto out;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 439b946c4808..db5900aaa55a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -478,7 +478,7 @@ static void kernfs_drain(struct kernfs_node *kn)
rwsem_release(&kn->dep_map, 1, _RET_IP_);
}
- kernfs_unmap_bin_file(kn);
+ kernfs_drain_open_files(kn);
mutex_lock(&kernfs_mutex);
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 78219d5644e9..35043a8c4529 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -348,9 +348,9 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
kernfs_put_active(of->kn);
}
-static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kernfs_vma_fault(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct kernfs_open_file *of = kernfs_of(file);
int ret;
@@ -362,16 +362,15 @@ static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
if (of->vm_ops->fault)
- ret = of->vm_ops->fault(vma, vmf);
+ ret = of->vm_ops->fault(vmf);
kernfs_put_active(of->kn);
return ret;
}
-static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int kernfs_vma_page_mkwrite(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct kernfs_open_file *of = kernfs_of(file);
int ret;
@@ -383,7 +382,7 @@ static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
ret = 0;
if (of->vm_ops->page_mkwrite)
- ret = of->vm_ops->page_mkwrite(vma, vmf);
+ ret = of->vm_ops->page_mkwrite(vmf);
else
file_update_time(file);
@@ -516,7 +515,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
goto out_put;
rc = 0;
- of->mmapped = 1;
+ of->mmapped = true;
of->vm_ops = vma->vm_ops;
vma->vm_ops = &kernfs_vm_ops;
out_put:
@@ -708,7 +707,8 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
if (error)
goto err_free;
- ((struct seq_file *)file->private_data)->private = of;
+ of->seq_file = file->private_data;
+ of->seq_file->private = of;
/* seq_file clears PWRITE unconditionally, restore it if WRITE */
if (file->f_mode & FMODE_WRITE)
@@ -717,13 +717,22 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
/* make sure we have open node struct */
error = kernfs_get_open_node(kn, of);
if (error)
- goto err_close;
+ goto err_seq_release;
+
+ if (ops->open) {
+ /* nobody has access to @of yet, skip @of->mutex */
+ error = ops->open(of);
+ if (error)
+ goto err_put_node;
+ }
/* open succeeded, put active references */
kernfs_put_active(kn);
return 0;
-err_close:
+err_put_node:
+ kernfs_put_open_node(kn, of);
+err_seq_release:
seq_release(inode, file);
err_free:
kfree(of->prealloc_buf);
@@ -733,11 +742,41 @@ err_out:
return error;
}
+/* used from release/drain to ensure that ->release() is called exactly once */
+static void kernfs_release_file(struct kernfs_node *kn,
+ struct kernfs_open_file *of)
+{
+ /*
+ * @of is guaranteed to have no other file operations in flight and
+ * we just want to synchronize release and drain paths.
+ * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * here because drain path may be called from places which can
+ * cause circular dependency.
+ */
+ lockdep_assert_held(&kernfs_open_file_mutex);
+
+ if (!of->released) {
+ /*
+ * A file is never detached without being released and we
+ * need to be able to release files which are deactivated
+ * and being drained. Don't use kernfs_ops().
+ */
+ kn->attr.ops->release(of);
+ of->released = true;
+ }
+}
+
static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
struct kernfs_open_file *of = kernfs_of(filp);
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ mutex_lock(&kernfs_open_file_mutex);
+ kernfs_release_file(kn, of);
+ mutex_unlock(&kernfs_open_file_mutex);
+ }
+
kernfs_put_open_node(kn, of);
seq_release(inode, filp);
kfree(of->prealloc_buf);
@@ -746,12 +785,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
return 0;
}
-void kernfs_unmap_bin_file(struct kernfs_node *kn)
+void kernfs_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
struct kernfs_open_file *of;
- if (!(kn->flags & KERNFS_HAS_MMAP))
+ if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
return;
spin_lock_irq(&kernfs_open_node_lock);
@@ -763,10 +802,16 @@ void kernfs_unmap_bin_file(struct kernfs_node *kn)
return;
mutex_lock(&kernfs_open_file_mutex);
+
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
- unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ if (kn->flags & KERNFS_HAS_MMAP)
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ kernfs_release_file(kn, of);
}
+
mutex_unlock(&kernfs_open_file_mutex);
kernfs_put_open_node(kn, NULL);
@@ -965,6 +1010,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
kn->flags |= KERNFS_HAS_SEQ_SHOW;
if (ops->mmap)
kn->flags |= KERNFS_HAS_MMAP;
+ if (ops->release)
+ kn->flags |= KERNFS_HAS_RELEASE;
rc = kernfs_add_one(kn);
if (rc) {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index bfd551bbf231..3100987cf8ba 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -104,7 +104,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
*/
extern const struct file_operations kernfs_file_fops;
-void kernfs_unmap_bin_file(struct kernfs_node *kn);
+void kernfs_drain_open_files(struct kernfs_node *kn);
/*
* symlink.c
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1c13dd80744f..7e4ea3b9f472 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6);
}
diff --git a/fs/mpage.c b/fs/mpage.c
index 28af984a3d96..baff8f820c29 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -115,7 +115,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
SetPageUptodate(page);
return;
}
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ create_empty_buffers(page, i_blocksize(inode), 0);
}
head = page_buffers(page);
page_bh = head;
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 39f57bef8531..0c3905e0542e 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -27,10 +27,9 @@
* XXX: how are we excluding truncate/invalidate here? Maybe need to lock
* page?
*/
-static int ncp_file_mmap_fault(struct vm_area_struct *area,
- struct vm_fault *vmf)
+static int ncp_file_mmap_fault(struct vm_fault *vmf)
{
- struct inode *inode = file_inode(area->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
char *pg_addr;
unsigned int already_read;
unsigned int count;
@@ -90,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
* -- nyc
*/
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
return VM_FAULT_MAJOR;
}
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index f32f272ee501..97b111d79489 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -525,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
return result;
}
if (result > len) {
- pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len);
+ pr_err("tcp: bug in recvmsg (%u > %zu)\n", result, len);
return -EIO;
}
return result;
@@ -619,7 +619,7 @@ skipdata:;
goto skipdata2;
}
if (datalen > req->datalen + 8) {
- pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
+ pr_err("tcp: Unexpected reply len %d (expected at most %zd)\n", datalen, req->datalen + 8);
server->rcv.state = 3;
goto skipdata;
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 2905479f214a..0ca370d23ddb 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -381,7 +381,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
struct blk_plug plug;
int i;
- dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
+ dprintk("%s enter, %zu@%lld\n", __func__, count, offset);
/* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index eb094c6011d8..fd0284c1dc32 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
- .vs_hidden = 1,
+ .vs_hidden = true,
+ .vs_need_cong_ctrl = true,
};
struct svc_version nfs4_callback_version4 = {
@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
- .vs_hidden = 1,
+ .vs_hidden = true,
+ .vs_need_cong_ctrl = true,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 26dbe8b0c10d..668213984d68 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -528,10 +528,10 @@ const struct address_space_operations nfs_file_aops = {
* writable, implying that someone is about to modify the page through a
* shared-writable mapping
*/
-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int nfs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct file *filp = vma->vm_file;
+ struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
int ret = VM_FAULT_NOPAGE;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a3fc48ba4931..18f98e08544d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -482,7 +482,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
u32 j, idx;
struct nfs_fh *fh;
- dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+ dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
@@ -540,7 +540,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
if (IS_ERR(ds_clnt))
return PNFS_NOT_ATTEMPTED;
- dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
+ dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 0ca4af8cca5d..d6acc688df7e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1751,7 +1751,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
int vers;
struct nfs_fh *fh;
- dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+ dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
@@ -1828,7 +1828,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
vers = nfs4_ff_layout_ds_version(lseg, idx);
- dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n",
+ dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count),
vers);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 2a4cdce939a0..8f3d2acb81c3 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -291,7 +291,7 @@ objlayout_read_pagelist(struct nfs_pgio_header *hdr)
&hdr->args.pgbase,
hdr->args.offset, hdr->args.count);
- dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
+ dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
__func__, inode->i_ino, offset, count, hdr->res.eof);
err = objio_read_pagelist(hdr);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index a06115e31612..92b4b41d19d2 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -24,7 +24,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
struct super_block *sb = inode->i_sb;
- u32 block_size = (1 << inode->i_blkbits);
+ u32 block_size = i_blocksize(inode);
struct pnfs_block_extent *bex;
struct iomap iomap;
u32 device_generation = 0;
@@ -181,7 +181,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
int nr_iomaps;
nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
@@ -375,7 +375,7 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
int nr_iomaps;
nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 43e109cc0ccc..e71f11b1a180 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1102,6 +1102,7 @@ static struct flags {
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
+ { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index d08cd88155c7..838f90f3f890 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 0c890347cde3..dcb5f79076c0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d818e4ffd79f..045c9081eabe 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nfserr = nfsd_write(rqstp, &resp->fh, NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &resp->committed);
+ nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+ rqstp->rq_vec, argp->vlen,
+ &cnt, resp->committed);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78109d666c..0274db6e65d0 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, length + 4);
if (unlikely(p == NULL))
goto out_overflow;
+ p += XDR_QUADLEN(length);
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
- struct nfs4_sessionid id;
- int status;
+ int status = -ESERVERFAULT;
__be32 *p;
u32 dummy;
- status = -ESERVERFAULT;
-
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
- memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
- if (memcmp(id.data, session->se_sessionid.data,
- NFS4_MAX_SESSIONID_LEN) != 0) {
+
+ if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
@@ -753,6 +750,14 @@ int set_callback_cred(void)
return 0;
}
+void cleanup_callback_cred(void)
+{
+ if (callback_cred) {
+ put_rpccred(callback_cred);
+ callback_cred = NULL;
+ }
+}
+
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b20577dcdd2..6b9b6cca469f 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
+
+ if (name == NULL || namelen == 0)
+ return nfserr_inval;
+
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
*uid = make_kuid(&init_user_ns, id);
if (!uid_valid(*uid))
@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
+
+ if (name == NULL || namelen == 0)
+ return nfserr_inval;
+
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
*gid = make_kgid(&init_user_ns, id);
if (!gid_valid(*gid))
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74a6e573e061..cbeeda1e94a2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable)
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
+ struct svc_export *exp = cstate->current_fh.fh_export;
if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp;
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp;
+ if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+ !(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+ return nfserr_attrnotsupp;
if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- &write->wr_how_written);
+ write->wr_how_written);
fput(filp);
write->wr_bytes_written = cnt;
@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
}
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ /* ac_supported, ac_resp_access */
+ return (op_encode_hdr_size + 2)* sizeof(__be32);
+}
+
static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret;
}
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+}
+
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+}
+
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
}
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+ * sizeof(__be32);
+}
+
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+ (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+}
+
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
}
#ifdef CONFIG_NFSD_PNFS
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = 0, rlen = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+
+ return (op_encode_hdr_size +
+ 1 /* gd_layout_type*/ +
+ XDR_QUADLEN(rlen) +
+ 2 /* gd_notify_types */) * sizeof(__be32);
+}
+
/*
* At this stage we don't really know what layout driver will handle the request,
* so we need to define an arbitrary upper bound here.
@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
}
#endif /* CONFIG_NFSD_PNFS */
+
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 3) * sizeof(__be32);
+}
+
static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUP",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUPP",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_TEST_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_test_stateid,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_TEST_STATEID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
},
[OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid,
@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
},
[OP_LAYOUTGET] = {
.op_func = (nfsd4op_func)nfsd4_layoutget,
@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
},
};
@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- struct nfsd4_operation *opdesc;
- nfsd4op_rsize estimator;
-
if (op->opnum == OP_ILLEGAL)
return op_encode_hdr_size * sizeof(__be32);
- opdesc = OPDESC(op);
- estimator = opdesc->op_rsize_bop;
- return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+
+ BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
+ return OPDESC(op)->op_rsize_bop(rqstp, op);
}
void warn_on_nonidempotent_op(struct nfsd4_op *op)
@@ -2476,12 +2537,13 @@ static struct svc_procedure nfsd_procedures4[2] = {
};
struct svc_version nfsd_version4 = {
- .vs_vers = 4,
- .vs_nproc = 2,
- .vs_proc = nfsd_procedures4,
- .vs_dispatch = nfsd_dispatch,
- .vs_xdrsize = NFS4_SVC_XDRSIZE,
- .vs_rpcb_optnl = 1,
+ .vs_vers = 4,
+ .vs_nproc = 2,
+ .vs_proc = nfsd_procedures4,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS4_SVC_XDRSIZE,
+ .vs_rpcb_optnl = true,
+ .vs_need_cong_ctrl = true,
};
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a0dee8ae9f97..e9ef50addddb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
- dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+ dprintk("NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
ret = set_callback_cred();
if (ret)
- return -ENOMEM;
+ return ret;
+
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) {
ret = -ENOMEM;
- goto out_recovery;
+ goto out_cleanup_cred;
}
ret = nfsd4_create_callback_queue();
if (ret)
goto out_free_laundry;
set_max_delegations();
-
return 0;
out_free_laundry:
destroy_workqueue(laundry_wq);
-out_recovery:
+out_cleanup_cred:
+ cleanup_callback_cred();
return ret;
}
@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
{
destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
+ cleanup_callback_cred();
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8fae53ce21d1..382c1fd05b4c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -58,7 +58,7 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2},
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
READ_BUF(16);
p = xdr_decode_hyper(p, &write->wr_offset);
write->wr_stable_how = be32_to_cpup(p++);
- if (write->wr_stable_how > 2)
+ if (write->wr_stable_how > NFS_FILE_SYNC)
goto xdr_error;
write->wr_buflen = be32_to_cpup(p++);
@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
} else
max_reply += nfsd4_max_reply(argp->rqstp, op);
/*
- * OP_LOCK may return a conflicting lock. (Special case
- * because it will just skip encoding this if it runs
- * out of xdr buffer space, and it is the only operation
- * that behaves this way.)
+ * OP_LOCK and OP_LOCKT may return a conflicting lock.
+ * (Special case because it will just skip encoding this
+ * if it runs out of xdr buffer space, and it is the only
+ * operation that behaves this way.)
*/
- if (op->opnum == OP_LOCK)
+ if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) {
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
DECODE_TAIL;
}
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+ struct svc_export *exp)
{
- if (IS_I_VERSION(inode)) {
+ if (exp->ex_flags & NFSEXP_V4ROOT) {
+ *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+ *p++ = 0;
+ } else if (IS_I_VERSION(inode)) {
p = xdr_encode_hyper(p, inode->i_version);
} else {
*p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
- err = security_inode_getsecctx(d_inode(dentry),
+ if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+ err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen);
+ else
+ err = -EOPNOTSUPP;
contextsupport = (err == 0);
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
- p = encode_change(p, &stat, d_inode(dentry));
+ p = encode_change(p, &stat, d_inode(dentry), exp);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
p = xdr_reserve_space(xdr, 8);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d6b97b424ad1..96fd15979cbd 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -578,7 +578,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
struct kvec *vec = &rqstp->rq_res.head[0];
if (vec->iov_len + data->iov_len > PAGE_SIZE) {
- printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
+ printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
data->iov_len);
return 0;
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f3b2f34b10a3..73e75ac90525 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -536,6 +536,19 @@ out_free:
return rv;
}
+static ssize_t
+nfsd_print_version_support(char *buf, int remaining, const char *sep,
+ unsigned vers, unsigned minor)
+{
+ const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+ bool supported = !!nfsd_vers(vers, NFSD_TEST);
+
+ if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+ supported = false;
+ return snprintf(buf, remaining, format, sep,
+ supported ? '+' : '-', vers, minor);
+}
+
static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, vers, size);
if (len <= 0) return -EINVAL;
do {
+ enum vers_op cmd;
sign = *vers;
if (sign == '+' || sign == '-')
num = simple_strtol((vers+1), &minorp, 0);
@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (*minorp == '.') {
if (num != 4)
return -EINVAL;
- minor = simple_strtoul(minorp+1, NULL, 0);
- if (minor == 0)
- return -EINVAL;
- if (nfsd_minorversion(minor, sign == '-' ?
- NFSD_CLEAR : NFSD_SET) < 0)
+ if (kstrtouint(minorp+1, 0, &minor) < 0)
return -EINVAL;
- goto next;
- }
+ } else
+ minor = 0;
+ cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) {
case 2:
case 3:
- case 4:
- nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+ nfsd_vers(num, cmd);
break;
+ case 4:
+ if (nfsd_minorversion(minor, cmd) >= 0)
+ break;
default:
return -EINVAL;
}
- next:
vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0);
/* If all get turned off, turn them back on, as
@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = 0;
sep = "";
remaining = SIMPLE_TRANSACTION_LIMIT;
- for (num=2 ; num <= 4 ; num++)
- if (nfsd_vers(num, NFSD_AVAIL)) {
- len = snprintf(buf, remaining, "%s%c%d", sep,
- nfsd_vers(num, NFSD_TEST)?'+':'-',
- num);
- sep = " ";
-
- if (len >= remaining)
- break;
- remaining -= len;
- buf += len;
- tlen += len;
- }
- if (nfsd_vers(4, NFSD_AVAIL))
- for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
- minor++) {
- len = snprintf(buf, remaining, " %c4.%u",
- (nfsd_vers(4, NFSD_TEST) &&
- nfsd_minorversion(minor, NFSD_TEST)) ?
- '+' : '-',
- minor);
-
+ for (num=2 ; num <= 4 ; num++) {
+ if (!nfsd_vers(num, NFSD_AVAIL))
+ continue;
+ minor = 0;
+ do {
+ len = nfsd_print_version_support(buf, remaining,
+ sep, num, minor);
if (len >= remaining)
- break;
+ goto out;
remaining -= len;
buf += len;
tlen += len;
- }
-
+ minor++;
+ sep = " ";
+ } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
+ }
+out:
len = snprintf(buf, remaining, "\n");
if (len >= remaining)
return -EINVAL;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d74c8c44dc35..d96606801d47 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void);
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2]));
}
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
{
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5c5a79..fa82b7707e85 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
struct nfsd_attrstat *resp)
{
__be32 nfserr;
- int stable = 1;
unsigned long cnt = argp->len;
dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &stable);
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+ rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp);
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e6bfd96734c0..efd66da99201 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
return 0;
}
+static void
+nfsd_adjust_nfsd_versions4(void)
+{
+ unsigned i;
+
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
+ if (nfsd_supported_minorversions[i])
+ return;
+ }
+ nfsd_vers(4, NFSD_CLEAR);
+}
+
int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
switch(change) {
case NFSD_SET:
nfsd_supported_minorversions[minorversion] = true;
+ nfsd_vers(4, NFSD_SET);
break;
case NFSD_CLEAR:
nfsd_supported_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4();
break;
case NFSD_TEST:
return nfsd_supported_minorversions[minorversion];
@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4516e8b7d776..005c911b34ac 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
+extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 26c6fdb4bf67..19d50f600e8d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
- int size_change = 0;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
- goto out;
+ return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
- return nfserrno(host_err);
+ goto out;
}
dentry = fhp->fh_dentry;
@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
- goto out;
+ return 0;
nfsd_sanitize_attrs(inode, iap);
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ return nfserr_notsync;
+
/*
* The size case is special, it changes the file in addition to the
- * attributes.
+ * attributes, and file systems don't expect it to be mixed with
+ * "random" attribute changes. We thus split out the size change
+ * into a separate call to ->setattr, and do the rest as a separate
+ * setattr call.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
- goto out;
- size_change = 1;
+ return err;
+ }
+ fh_lock(fhp);
+ if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
- if (iap->ia_size != i_size_read(inode))
- iap->ia_valid |= ATTR_MTIME;
- }
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
- iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+ iap->ia_valid &= ~ATTR_SIZE;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ goto out_unlock;
}
- fh_lock(fhp);
+ iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
- err = nfserrno(host_err);
-out_put_write_access:
+out_unlock:
+ fh_unlock(fhp);
if (size_change)
put_write_access(inode);
- if (!err)
- err = nfserrno(commit_metadata(fhp));
out:
- return err;
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int *stablep)
+ unsigned long *cnt, int stable)
{
struct svc_export *exp;
- struct inode *inode;
mm_segment_t oldfs;
__be32 err = 0;
int host_err;
- int stable = *stablep;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
*/
current->flags |= PF_LESS_THROTTLE;
- inode = file_inode(file);
- exp = fhp->fh_export;
-
+ exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
- stable = 0;
+ stable = NFS_UNSTABLE;
if (stable && !use_wgather)
flags |= RWF_SYNC;
@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put
*/
__be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{
- __be32 err = 0;
+ struct file *file = NULL;
+ __be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen);
- if (file) {
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
- stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- } else {
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
- if (err)
- goto out;
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ if (err)
+ goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- if (cnt)
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
- cnt, stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- fput(file);
- }
+ trace_write_opened(rqstp, fhp, offset, vlen);
+ err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
+ fput(file);
out:
trace_write_done(rqstp, fhp, offset, vlen);
return err;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 0bf9e7bf5800..db98c48c735a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
- loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+ struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep);
+ int stable);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 2c90e285d7c6..03b8ba933eb2 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -34,7 +34,7 @@
static inline unsigned long
nilfs_palloc_groups_per_desc_block(const struct inode *inode)
{
- return (1UL << inode->i_blkbits) /
+ return i_blocksize(inode) /
sizeof(struct nilfs_palloc_group_desc);
}
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index d5c23da43513..c21e0b4454a6 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -50,7 +50,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
brelse(bh);
BUG();
}
- memset(bh->b_data, 0, 1 << inode->i_blkbits);
+ memset(bh->b_data, 0, i_blocksize(inode));
bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 2e315f9f2e51..06ffa135dfa6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -119,7 +119,7 @@ nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
{
- return 1 << btree->b_inode->i_blkbits;
+ return i_blocksize(btree->b_inode);
}
static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
@@ -1870,7 +1870,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
di = &dreq;
ni = NULL;
} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
- 1 << btree->b_inode->i_blkbits)) {
+ nilfs_btree_node_size(btree))) {
di = &dreq;
ni = &nreq;
} else {
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 547381f3ce13..c5fa3dee72fc 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -51,8 +51,9 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return err;
}
-static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int nilfs_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct nilfs_transaction_info ti;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c7f4fef9ebf5..7ffe71a8dfb9 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -51,7 +51,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_add_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_add(n, &root->blocks_count);
}
@@ -60,7 +60,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_sub_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_sub(n, &root->blocks_count);
}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index d56d3a5bea88..98835ed6bef4 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -57,7 +57,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
set_buffer_mapped(bh);
kaddr = kmap_atomic(bh->b_page);
- memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
+ memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
if (init_block)
init_block(inode, bh, kaddr);
flush_dcache_page(bh->b_page);
@@ -501,7 +501,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
mi->mi_entry_size = entry_size;
- mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
+ mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bedcae2c28e6..7d18d62e8e07 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -723,7 +723,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
lock_page(page);
if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ create_empty_buffers(page, i_blocksize(inode), 0);
unlock_page(page);
bh = head = page_buffers(page);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 11556b7d93ec..88a31e9340a0 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -608,7 +608,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
unsigned int block_end, block_start;
- unsigned int bsize = 1 << inode->i_blkbits;
+ unsigned int bsize = i_blocksize(inode);
if (!page_has_buffers(page))
create_empty_buffers(page, bsize, 0);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 7025d8c27999..3e04279446e8 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2924,7 +2924,7 @@ again:
/*
* if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
* another try; otherwise, we are sure the MIGRATING state is there,
- * drop the unneded state which blocked threads trying to DIRTY
+ * drop the unneeded state which blocked threads trying to DIRTY
*/
spin_lock(&res->spinlock);
BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7b6a146327d7..8836305eb378 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* We know that zero_from is block aligned */
for (block_start = zero_from; block_start < zero_to;
block_start = block_end) {
- block_end = block_start + (1 << inode->i_blkbits);
+ block_end = block_start + i_blocksize(inode);
/*
* block_start is block-aligned. Bump it by one to force
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 429088786e93..098f5c712569 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -44,17 +44,18 @@
#include "ocfs2_trace.h"
-static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+static int ocfs2_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
sigset_t oldset;
int ret;
ocfs2_block_signals(&oldset);
- ret = filemap_fault(area, vmf);
+ ret = filemap_fault(vmf);
ocfs2_unblock_signals(&oldset);
- trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno,
- area, vmf->page, vmf->pgoff);
+ trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
+ vma, vmf->page, vmf->pgoff);
return ret;
}
@@ -127,10 +128,10 @@ out:
return ret;
}
-static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ocfs2_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct buffer_head *di_bh = NULL;
sigset_t oldset;
int ret;
@@ -160,7 +161,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
+ ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index b0ced669427e..c4ab6fdf17a0 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -400,8 +400,9 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
/* remove the op from the in progress hash table */
op = orangefs_devreq_remove_op(head.tag);
if (!op) {
- gossip_err("WARNING: No one's waiting for tag %llu\n",
- llu(head.tag));
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: No one's waiting for tag %llu\n",
+ __func__, llu(head.tag));
return ret;
}
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 551bc74ed2b8..5cd617980fbf 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -136,12 +136,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
return -EINVAL;
}
-struct backing_dev_info orangefs_backing_dev_info = {
- .name = "orangefs",
- .ra_pages = 0,
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
/** ORANGEFS2 implementation of address space operations */
const struct address_space_operations orangefs_address_operations = {
.readpage = orangefs_readpage,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 75375e90a63f..6333cbbdfef7 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -344,6 +344,11 @@ int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
user_desc->size,
user_desc->count);
+ if (user_desc->total_size < 0 ||
+ user_desc->size < 0 ||
+ user_desc->count < 0)
+ goto out;
+
/*
* sanity check alignment and size of buffer that caller wants to
* work with
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 27e75cf28b3a..791912da97d7 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -967,13 +967,13 @@ int orangefs_debugfs_new_client_string(void __user *arg)
int ret;
ret = copy_from_user(&client_debug_array_string,
- (void __user *)arg,
- ORANGEFS_MAX_DEBUG_STRING_LEN);
+ (void __user *)arg,
+ ORANGEFS_MAX_DEBUG_STRING_LEN);
if (ret != 0) {
pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
__func__);
- return -EIO;
+ return -EFAULT;
}
/*
@@ -988,17 +988,18 @@ int orangefs_debugfs_new_client_string(void __user *arg)
*/
client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
'\0';
-
+
pr_info("%s: client debug array string has been received.\n",
__func__);
if (!help_string_initialized) {
/* Build a proper debug help string. */
- if (orangefs_prepare_debugfs_help_string(0)) {
+ ret = orangefs_prepare_debugfs_help_string(0);
+ if (ret) {
gossip_err("%s: no debug help string \n",
__func__);
- return -EIO;
+ return ret;
}
}
@@ -1011,7 +1012,7 @@ int orangefs_debugfs_new_client_string(void __user *arg)
help_string_initialized++;
- return ret;
+ return 0;
}
int orangefs_debugfs_new_debug(void __user *arg)
diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h
index a3d84ffee905..f380f9ed1b28 100644
--- a/fs/orangefs/orangefs-dev-proto.h
+++ b/fs/orangefs/orangefs-dev-proto.h
@@ -50,8 +50,7 @@
* Misc constants. Please retain them as multiples of 8!
* Otherwise 32-64 bit interactions will be messed up :)
*/
-#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000400
-#define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800
+#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000800
/*
* The maximum number of directory entries in a single request is 96.
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 3bf803d732c5..70355a9a2596 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -529,7 +529,6 @@ extern spinlock_t orangefs_htable_ops_in_progress_lock;
extern int hash_table_size;
extern const struct address_space_operations orangefs_address_operations;
-extern struct backing_dev_info orangefs_backing_dev_info;
extern const struct inode_operations orangefs_file_inode_operations;
extern const struct file_operations orangefs_file_operations;
extern const struct inode_operations orangefs_symlink_inode_operations;
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index 4113eb0495bf..c1b5174cb5a9 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -80,11 +80,6 @@ static int __init orangefs_init(void)
int ret = -1;
__u32 i = 0;
- ret = bdi_init(&orangefs_backing_dev_info);
-
- if (ret)
- return ret;
-
if (op_timeout_secs < 0)
op_timeout_secs = 0;
@@ -94,7 +89,7 @@ static int __init orangefs_init(void)
/* initialize global book keeping data structures */
ret = op_cache_initialize();
if (ret < 0)
- goto err;
+ goto out;
ret = orangefs_inode_cache_initialize();
if (ret < 0)
@@ -181,9 +176,6 @@ cleanup_inode:
cleanup_op:
op_cache_finalize();
-err:
- bdi_destroy(&orangefs_backing_dev_info);
-
out:
return ret;
}
@@ -207,8 +199,6 @@ static void __exit orangefs_exit(void)
kfree(orangefs_htable_ops_in_progress);
- bdi_destroy(&orangefs_backing_dev_info);
-
pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION);
}
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
index 084954448f18..afd2f523b283 100644
--- a/fs/orangefs/orangefs-sysfs.c
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -91,6 +91,13 @@
* Description:
* Readahead cache buffer count and size.
*
+ * What: /sys/fs/orangefs/readahead_readcnt
+ * Date: Jan 2017
+ * Contact: Martin Brandenburg <martin@omnibond.com>
+ * Description:
+ * Number of buffers (in multiples of readahead_size)
+ * which can be read ahead for a single file at once.
+ *
* What: /sys/fs/orangefs/acache/...
* Date: Jun 2015
* Contact: Martin Brandenburg <martin@omnibond.com>
@@ -329,7 +336,8 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj,
if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) &&
(!strcmp(attr->attr.name, "readahead_count") ||
!strcmp(attr->attr.name, "readahead_size") ||
- !strcmp(attr->attr.name, "readahead_count_size"))) {
+ !strcmp(attr->attr.name, "readahead_count_size") ||
+ !strcmp(attr->attr.name, "readahead_readcnt"))) {
rc = -EINVAL;
goto out;
}
@@ -360,6 +368,11 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj,
"readahead_count_size"))
new_op->upcall.req.param.op =
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE;
+
+ else if (!strcmp(attr->attr.name,
+ "readahead_readcnt"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT;
} else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) {
if (!strcmp(attr->attr.name, "timeout_msecs"))
new_op->upcall.req.param.op =
@@ -542,7 +555,8 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj,
if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) &&
(!strcmp(attr->attr.name, "readahead_count") ||
!strcmp(attr->attr.name, "readahead_size") ||
- !strcmp(attr->attr.name, "readahead_count_size"))) {
+ !strcmp(attr->attr.name, "readahead_count_size") ||
+ !strcmp(attr->attr.name, "readahead_readcnt"))) {
rc = -EINVAL;
goto out;
}
@@ -609,6 +623,15 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj,
new_op->upcall.req.param.u.value32[0] = val1;
new_op->upcall.req.param.u.value32[1] = val2;
goto value_set;
+ } else if (!strcmp(attr->attr.name,
+ "readahead_readcnt")) {
+ if ((val >= 0)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT;
+ } else {
+ rc = 0;
+ goto out;
+ }
}
} else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) {
@@ -812,6 +835,10 @@ static struct orangefs_attribute readahead_count_size_attribute =
__ATTR(readahead_count_size, 0664, sysfs_service_op_show,
sysfs_service_op_store);
+static struct orangefs_attribute readahead_readcnt_attribute =
+ __ATTR(readahead_readcnt, 0664, sysfs_service_op_show,
+ sysfs_service_op_store);
+
static struct orangefs_attribute perf_counter_reset_attribute =
__ATTR(perf_counter_reset,
0664,
@@ -838,6 +865,7 @@ static struct attribute *orangefs_default_attrs[] = {
&readahead_count_attribute.attr,
&readahead_size_attribute.attr,
&readahead_count_size_attribute.attr,
+ &readahead_readcnt_attribute.attr,
&perf_counter_reset_attribute.attr,
&perf_history_size_attribute.attr,
&perf_time_interval_secs_attribute.attr,
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 06af81f71e10..9b96b99539d6 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -306,7 +306,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
break;
case S_IFDIR:
inode->i_size = PAGE_SIZE;
- orangefs_inode->blksize = (1 << inode->i_blkbits);
+ orangefs_inode->blksize = i_blocksize(inode);
spin_lock(&inode->i_lock);
inode_set_bytes(inode, inode->i_size);
spin_unlock(&inode->i_lock);
@@ -316,7 +316,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
if (new) {
inode->i_size = (loff_t)strlen(new_op->
downcall.resp.getattr.link_target);
- orangefs_inode->blksize = (1 << inode->i_blkbits);
+ orangefs_inode->blksize = i_blocksize(inode);
ret = strscpy(orangefs_inode->link_target,
new_op->downcall.resp.getattr.link_target,
ORANGEFS_NAME_MAX);
diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h
index af0b0e36d559..b8249f8fdd80 100644
--- a/fs/orangefs/upcall.h
+++ b/fs/orangefs/upcall.h
@@ -182,6 +182,7 @@ enum orangefs_param_request_op {
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_SIZE = 26,
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT = 27,
ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE = 28,
+ ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT = 29,
};
struct orangefs_param_request_s {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b73b4de8fb36..1e1e182d571b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -292,101 +292,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
}
} else {
/*
- * Command line (1 string) occupies ARGV and maybe
- * extends into ENVP.
- */
- if (len1 + len2 <= *pos)
- goto skip_argv_envp;
- if (len1 <= *pos)
- goto skip_argv;
-
- p = arg_start + *pos;
- len = len1 - *pos;
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /*
- * Command line can be shorter than whole ARGV
- * even if last "marker" byte says it is not.
- */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
- }
-skip_argv:
- /*
* Command line (1 string) occupies ARGV and
* extends into ENVP.
*/
- if (len1 <= *pos) {
- p = env_start + *pos - len1;
- len = len1 + len2 - *pos;
- } else {
- p = env_start;
- len = len2;
+ struct {
+ unsigned long p;
+ unsigned long len;
+ } cmdline[2] = {
+ { .p = arg_start, .len = len1 },
+ { .p = env_start, .len = len2 },
+ };
+ loff_t pos1 = *pos;
+ unsigned int i;
+
+ i = 0;
+ while (i < 2 && pos1 >= cmdline[i].len) {
+ pos1 -= cmdline[i].len;
+ i++;
}
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /* Find EOS. */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
+ while (i < 2) {
+ p = cmdline[i].p + pos1;
+ len = cmdline[i].len - pos1;
+ while (count > 0 && len > 0) {
+ unsigned int _count, l;
+ int nr_read;
+ bool final;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ /*
+ * Command line can be shorter than whole ARGV
+ * even if last "marker" byte says it is not.
+ */
+ final = false;
+ l = strnlen(page, nr_read);
+ if (l < nr_read) {
+ nr_read = l;
+ final = true;
+ }
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+
+ if (final)
+ goto out_free_page;
}
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
+ /* Only first chunk can be read partially. */
+ pos1 = 0;
+ i++;
}
-skip_argv_envp:
- ;
}
out_free_page:
@@ -729,11 +697,11 @@ static int proc_pid_permission(struct inode *inode, int mask)
task = get_proc_task(inode);
if (!task)
return -ESRCH;
- has_perms = has_pid_permissions(pid, task, 1);
+ has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
put_task_struct(task);
if (!has_perms) {
- if (pid->hide_pid == 2) {
+ if (pid->hide_pid == HIDEPID_INVISIBLE) {
/*
* Let's make getdents(), stat(), and open()
* consistent with each other. If a process
@@ -798,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
if (!IS_ERR_OR_NULL(mm)) {
/* ensure this mm_struct can't be freed */
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
/* but do not pin its memory */
mmput(mm);
}
@@ -845,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
return -ENOMEM;
copied = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
@@ -953,7 +921,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
return -ENOMEM;
ret = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
down_read(&mm->mmap_sem);
@@ -1096,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
if (p) {
if (atomic_read(&p->mm->mm_users) > 1) {
mm = p->mm;
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
}
task_unlock(p);
}
@@ -1769,7 +1737,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
stat->gid = GLOBAL_ROOT_GID;
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if (!has_pid_permissions(pid, task, 2)) {
+ if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
rcu_read_unlock();
/*
* This doesn't prevent learning whether PID exists,
@@ -3200,7 +3168,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
int len;
cond_resched();
- if (!has_pid_permissions(ns, iter.task, 2))
+ if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
continue;
len = snprintf(name, sizeof(name), "%d", iter.tgid);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f6a01f09f79d..06c73904d497 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
struct rb_node *node = dir->subdir.rb_node;
while (node) {
- struct proc_dir_entry *de = container_of(node,
- struct proc_dir_entry,
- subdir_node);
+ struct proc_dir_entry *de = rb_entry(node,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(len, name, de);
if (result < 0)
@@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
/* Figure out where to put new node */
while (*new) {
- struct proc_dir_entry *this =
- container_of(*new, struct proc_dir_entry, subdir_node);
+ struct proc_dir_entry *this = rb_entry(*new,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(de->namelen, de->name, this);
parent = *new;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ad9ed7958af..2cc7a8030275 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -107,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
- if (pid->hide_pid != 0)
+ if (pid->hide_pid != HIDEPID_OFF)
seq_printf(seq, ",hidepid=%u", pid->hide_pid);
return 0;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..ea9f3d1ae830 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = 0;
+ if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 1988440b2049..b90da888b81a 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -58,7 +58,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid)
case Opt_hidepid:
if (match_int(&args[0], &option))
return 0;
- if (option < 0 || option > 2) {
+ if (option < HIDEPID_OFF ||
+ option > HIDEPID_INVISIBLE) {
pr_err("proc: hidepid value must be between 0 and 2.\n");
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8f96a49178d0..ee3efb229ef6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -167,7 +167,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
@@ -1352,7 +1352,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end_vaddr;
int ret = 0, copied = 0;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
goto out;
ret = -EINVAL;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 37175621e890..1ef97cfcf422 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -219,7 +219,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5105b1599981..885d445afa0d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pgoff_t index = vmf->pgoff;
struct page *page;
loff_t offset;
@@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
}
return 0;
fail:
- do_munmap(vma->vm_mm, from, len);
+ do_munmap(vma->vm_mm, from, len, NULL);
return -EAGAIN;
}
@@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
return 0;
fail:
- do_munmap(vma->vm_mm, vma->vm_start, len);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
return -EAGAIN;
}
#else
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e18e36..efab7b64925b 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
{
int ret;
- ret = lz4_compress(in, inlen, out, &outlen, workspace);
- if (ret) {
- pr_err("lz4_compress error, ret = %d!\n", ret);
+ ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+ if (!ret) {
+ pr_err("LZ4_compress_default error; compression failed!\n");
return -EIO;
}
- return outlen;
+ return ret;
}
static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
{
int ret;
- ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
- if (ret) {
- pr_err("lz4_decompress error, ret = %d!\n", ret);
+ ret = LZ4_decompress_safe(in, out, inlen, outlen);
+ if (ret < 0) {
+ /*
+ * LZ4_decompress_safe will return an error code
+ * (< 0) if decompression failed
+ */
+ pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
return -EIO;
}
- return outlen;
+ return ret;
}
static void allocate_lz4(void)
{
- big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+ big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
if (big_oops_buf) {
workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2f8c5c9bdaf6..b396eb09f288 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
int ret = 0;
th.t_trans_id = 0;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
if (logit) {
reiserfs_write_lock(s);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index cfeae9b0a2b7..a6ab9d64ea1b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -525,7 +525,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
* referenced in convert_tail_for_hole() that may be called from
* reiserfs_get_block()
*/
- bh_result->b_size = (1 << inode->i_blkbits);
+ bh_result->b_size = i_blocksize(inode);
ret = reiserfs_get_block(inode, iblock, bh_result,
create | GET_BLOCK_NO_DANGLE);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e314cb30a181..feabcde0290d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1166,7 +1166,7 @@ static int reiserfs_parse_options(struct super_block *s,
if (!strcmp(arg, "auto")) {
/* From JFS code, to auto-get the size. */
*blocks =
- s->s_bdev->bd_inode->i_size >> s->
+ i_size_read(s->s_bdev->bd_inode) >> s->
s_blocksize_bits;
} else {
*blocks = simple_strtoul(arg, &p, 0);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468bd18b0..95da65366548 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
struct squashfs_lz4 *stream = strm;
void *buff = stream->input, *data;
int avail, i, bytes = length, res;
- size_t dest_len = output->length;
for (i = 0; i < b; i++) {
avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
put_bh(bh[i]);
}
- res = lz4_decompress_unknownoutputsize(stream->input, length,
- stream->output, &dest_len);
- if (res)
+ res = LZ4_decompress_safe(stream->input, stream->output,
+ length, output->length);
+
+ if (res < 0)
return -EIO;
- bytes = dest_len;
+ bytes = res;
data = squashfs_first_page(output);
buff = stream->output;
while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
squashfs_finish_page(output);
- return dest_len;
+ return res;
}
const struct squashfs_decompressor squashfs_lz4_comp_ops = {
diff --git a/fs/stat.c b/fs/stat.c
index a268b7f27adf..3f14d1ef0868 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
- stat->blksize = (1 << inode->i_blkbits);
+ stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks;
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b0d783774c96..d9ae86f96df7 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1506,11 +1506,10 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
* mmap()d file has taken write protection fault and is being made writable.
* UBIFS must ensure page is budgeted for.
*/
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+static int ubifs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec now = ubifs_current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8ec6b3df0bc7..a8d8f71ef8bd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1193,7 +1193,7 @@ int udf_setsize(struct inode *inode, loff_t newsize)
{
int err;
struct udf_inode_info *iinfo;
- int bsize = 1 << inode->i_blkbits;
+ int bsize = i_blocksize(inode);
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 18406158e13f..3c421d06a18e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -71,6 +71,13 @@ struct userfaultfd_fork_ctx {
struct list_head list;
};
+struct userfaultfd_unmap_ctx {
+ struct userfaultfd_ctx *ctx;
+ unsigned long start;
+ unsigned long end;
+ struct list_head list;
+};
+
struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
@@ -681,16 +688,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
userfaultfd_event_wait_completion(ctx, &ewq);
}
-void madvise_userfault_dontneed(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end)
+void userfaultfd_remove(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue ewq;
ctx = vma->vm_userfaultfd_ctx.ctx;
- if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_MADVDONTNEED))
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
return;
userfaultfd_ctx_get(ctx);
@@ -700,15 +707,101 @@ void madvise_userfault_dontneed(struct vm_area_struct *vma,
msg_init(&ewq.msg);
- ewq.msg.event = UFFD_EVENT_MADVDONTNEED;
- ewq.msg.arg.madv_dn.start = start;
- ewq.msg.arg.madv_dn.end = end;
+ ewq.msg.event = UFFD_EVENT_REMOVE;
+ ewq.msg.arg.remove.start = start;
+ ewq.msg.arg.remove.end = end;
userfaultfd_event_wait_completion(ctx, &ewq);
down_read(&mm->mmap_sem);
}
+static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
+ unsigned long start, unsigned long end)
+{
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+
+ list_for_each_entry(unmap_ctx, unmaps, list)
+ if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
+ unmap_ctx->end == end)
+ return true;
+
+ return false;
+}
+
+int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *unmaps)
+{
+ for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+ has_unmap_ctx(ctx, unmaps, start, end))
+ continue;
+
+ unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+ if (!unmap_ctx)
+ return -ENOMEM;
+
+ userfaultfd_ctx_get(ctx);
+ unmap_ctx->ctx = ctx;
+ unmap_ctx->start = start;
+ unmap_ctx->end = end;
+ list_add_tail(&unmap_ctx->list, unmaps);
+ }
+
+ return 0;
+}
+
+void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
+{
+ struct userfaultfd_unmap_ctx *ctx, *n;
+ struct userfaultfd_wait_queue ewq;
+
+ list_for_each_entry_safe(ctx, n, uf, list) {
+ msg_init(&ewq.msg);
+
+ ewq.msg.event = UFFD_EVENT_UNMAP;
+ ewq.msg.arg.remove.start = ctx->start;
+ ewq.msg.arg.remove.end = ctx->end;
+
+ userfaultfd_event_wait_completion(ctx->ctx, &ewq);
+
+ list_del(&ctx->list);
+ kfree(ctx);
+ }
+}
+
+void userfaultfd_exit(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma = mm->mmap;
+
+ /*
+ * We can do the vma walk without locking because the caller
+ * (exit_mm) knows it now has exclusive access
+ */
+ while (vma) {
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+ if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
+ struct userfaultfd_wait_queue ewq;
+
+ userfaultfd_ctx_get(ctx);
+
+ msg_init(&ewq.msg);
+ ewq.msg.event = UFFD_EVENT_EXIT;
+
+ userfaultfd_event_wait_completion(ctx, &ewq);
+
+ ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
+ }
+
+ vma = vma->vm_next;
+ }
+}
+
static int userfaultfd_release(struct inode *inode, struct file *file)
{
struct userfaultfd_ctx *ctx = file->private_data;
@@ -1514,6 +1607,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
uffdio_copy.len);
mmput(ctx->mm);
+ } else {
+ return -ENOSPC;
}
if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
return -EFAULT;
@@ -1712,17 +1807,17 @@ static void init_once_userfaultfd_ctx(void *mem)
}
/**
- * userfaultfd_file_create - Creates an userfaultfd file pointer.
+ * userfaultfd_file_create - Creates a userfaultfd file pointer.
* @flags: Flags for the userfaultfd file.
*
- * This function creates an userfaultfd file pointer, w/out installing
+ * This function creates a userfaultfd file pointer, w/out installing
* it into the fd table. This is useful when the userfaultfd file is
* used during the initialization of data structures that require
* extra setup after the userfaultfd creation. So the userfaultfd
* creation is split into the file pointer creation phase, and the
* file descriptor installation phase. In this way races with
* userspace closing the newly installed file descriptor can be
- * avoided. Returns an userfaultfd file pointer, or a proper error
+ * avoided. Returns a userfaultfd file pointer, or a proper error
* pointer.
*/
static struct file *userfaultfd_file_create(int flags)
@@ -1752,7 +1847,7 @@ static struct file *userfaultfd_file_create(int flags)
ctx->released = false;
ctx->mm = current->mm;
/* prevent the mm struct to be freed */
- atomic_inc(&ctx->mm->mm_count);
+ mmgrab(ctx->mm);
file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1ff9df7a3ce8..bf65a9ea8642 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -103,9 +103,9 @@ xfs_finish_page_writeback(
unsigned int bsize;
ASSERT(bvec->bv_offset < PAGE_SIZE);
- ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+ ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
ASSERT(end < PAGE_SIZE);
- ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
+ ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
bh = head = page_buffers(bvec->bv_page);
@@ -349,7 +349,7 @@ xfs_map_blocks(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- ssize_t count = 1 << inode->i_blkbits;
+ ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb, end_fsb;
int error = 0;
int bmapi_flags = XFS_BMAPI_ENTIRE;
@@ -758,7 +758,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while ((bh = bh->b_this_page) != head);
@@ -846,7 +846,7 @@ xfs_writepage_map(
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
- ssize_t len = 1 << inode->i_blkbits;
+ ssize_t len = i_blocksize(inode);
int error = 0;
int count = 0;
int uptodate = 1;
@@ -1210,7 +1210,7 @@ xfs_map_trim_size(
offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
- 1 << inode->i_blkbits);
+ i_blocksize(inode));
}
if (mapping_size > LONG_MAX)
mapping_size = LONG_MAX;
@@ -1241,7 +1241,7 @@ xfs_get_blocks(
return -EIO;
offset = (xfs_off_t)iblock << inode->i_blkbits;
- ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+ ASSERT(bh_result->b_size >= i_blocksize(inode));
size = bh_result->b_size;
if (offset >= i_size_read(inode))
@@ -1389,7 +1389,7 @@ xfs_vm_set_page_dirty(
if (offset < end_offset)
set_buffer_dirty(bh);
bh = bh->b_this_page;
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while (bh != head);
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 022014016d80..35703a801372 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -754,7 +754,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
@@ -776,7 +776,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
@@ -1379,22 +1379,21 @@ xfs_file_llseek(
*/
STATIC int
xfs_filemap_page_mkwrite(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret;
trace_xfs_filemap_page_mkwrite(XFS_I(inode));
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
- ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
} else {
- ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
+ ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
}
@@ -1406,23 +1405,22 @@ xfs_filemap_page_mkwrite(
STATIC int
xfs_filemap_fault(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
int ret;
trace_xfs_filemap_fault(XFS_I(inode));
/* DAX can shortcut the normal fault path on write faults! */
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
- return xfs_filemap_page_mkwrite(vma, vmf);
+ return xfs_filemap_page_mkwrite(vmf);
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode))
- ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
else
- ret = filemap_fault(vma, vmf);
+ ret = filemap_fault(vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
return ret;
@@ -1431,13 +1429,14 @@ xfs_filemap_fault(
/*
* Similar to xfs_filemap_fault(), the DAX fault path can call into here on
* both read and write faults. Hence we need to handle both cases. There is no
- * ->pmd_mkwrite callout for huge pages, so we have a single function here to
+ * ->huge_mkwrite callout for huge pages, so we have a single function here to
* handle both cases here. @flags carries the information on the type of fault
* occuring.
*/
STATIC int
-xfs_filemap_pmd_fault(
- struct vm_fault *vmf)
+xfs_filemap_huge_fault(
+ struct vm_fault *vmf,
+ enum page_entry_size pe_size)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
@@ -1446,7 +1445,7 @@ xfs_filemap_pmd_fault(
if (!IS_DAX(inode))
return VM_FAULT_FALLBACK;
- trace_xfs_filemap_pmd_fault(ip);
+ trace_xfs_filemap_huge_fault(ip);
if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
@@ -1454,7 +1453,7 @@ xfs_filemap_pmd_fault(
}
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_iomap_pmd_fault(vmf, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (vmf->flags & FAULT_FLAG_WRITE)
@@ -1471,11 +1470,10 @@ xfs_filemap_pmd_fault(
*/
static int
xfs_filemap_pfn_mkwrite(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret = VM_FAULT_NOPAGE;
loff_t size;
@@ -1483,7 +1481,7 @@ xfs_filemap_pfn_mkwrite(
trace_xfs_filemap_pfn_mkwrite(ip);
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
/* check if the faulting page hasn't raced with truncate */
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
@@ -1491,7 +1489,7 @@ xfs_filemap_pfn_mkwrite(
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else if (IS_DAX(inode))
- ret = dax_pfn_mkwrite(vma, vmf);
+ ret = dax_pfn_mkwrite(vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
@@ -1500,7 +1498,7 @@ xfs_filemap_pfn_mkwrite(
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = xfs_filemap_fault,
- .pmd_fault = xfs_filemap_pmd_fault,
+ .huge_fault = xfs_filemap_huge_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_filemap_page_mkwrite,
.pfn_mkwrite = xfs_filemap_pfn_mkwrite,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index fb7555e73a62..383ac227ce2c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -687,7 +687,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
DEFINE_INODE_EVENT(xfs_filemap_fault);
-DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
+DEFINE_INODE_EVENT(xfs_filemap_huge_fault);
DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);