diff options
Diffstat (limited to 'fs')
157 files changed, 2399 insertions, 2147 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 6a0f3fa85ef7..3de3b4a89d89 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -534,11 +534,11 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) } static int -v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +v9fs_vm_page_mkwrite(struct vm_fault *vmf) { struct v9fs_inode *v9inode; struct page *page = vmf->page; - struct file *filp = vma->vm_file; + struct file *filp = vmf->vma->vm_file; struct inode *inode = file_inode(filp); diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 2f088773f1c0..2f8bab390d13 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -138,9 +138,9 @@ extern int affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh); extern int affs_remove_header(struct dentry *dentry); extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh); extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh); -extern void secs_to_datestamp(time64_t secs, struct affs_date *ds); -extern umode_t prot_to_mode(u32 prot); -extern void mode_to_prot(struct inode *inode); +extern void affs_secs_to_datestamp(time64_t secs, struct affs_date *ds); +extern umode_t affs_prot_to_mode(u32 prot); +extern void affs_mode_to_prot(struct inode *inode); __printf(3, 4) extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); @@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb); /* namei.c */ +extern const struct export_operations affs_export_ops; extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); @@ -178,7 +179,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, /* inode.c */ -extern unsigned long affs_parent_ino(struct inode *dir); extern struct inode *affs_new_inode(struct inode *dir); extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); extern void affs_evict_inode(struct inode *inode); @@ -213,6 +213,12 @@ extern const struct address_space_operations affs_aops_ofs; extern const struct dentry_operations affs_dentry_operations; extern const struct dentry_operations affs_intl_dentry_operations; +static inline bool affs_validblock(struct super_block *sb, int block) +{ + return(block >= AFFS_SB(sb)->s_reserved && + block < AFFS_SB(sb)->s_partition_size); +} + static inline void affs_set_blocksize(struct super_block *sb, int size) { @@ -222,7 +228,7 @@ static inline struct buffer_head * affs_bread(struct super_block *sb, int block) { pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) + if (affs_validblock(sb, block)) return sb_bread(sb, block); return NULL; } @@ -230,7 +236,7 @@ static inline struct buffer_head * affs_getblk(struct super_block *sb, int block) { pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) + if (affs_validblock(sb, block)) return sb_getblk(sb, block); return NULL; } @@ -239,7 +245,7 @@ affs_getzeroblk(struct super_block *sb, int block) { struct buffer_head *bh; pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) { + if (affs_validblock(sb, block)) { bh = sb_getblk(sb, block); lock_buffer(bh); memset(bh->b_data, 0 , sb->s_blocksize); @@ -254,7 +260,7 @@ affs_getemptyblk(struct super_block *sb, int block) { struct buffer_head *bh; pr_debug("%s: %d\n", __func__, block); - if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) { + if (affs_validblock(sb, block)) { bh = sb_getblk(sb, block); wait_on_buffer(bh); set_buffer_uptodate(bh); diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 0ec65c133b93..b573c3b9a328 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh) } void -secs_to_datestamp(time64_t secs, struct affs_date *ds) +affs_secs_to_datestamp(time64_t secs, struct affs_date *ds) { u32 days; u32 minute; @@ -386,55 +386,55 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds) } umode_t -prot_to_mode(u32 prot) +affs_prot_to_mode(u32 prot) { umode_t mode = 0; if (!(prot & FIBF_NOWRITE)) - mode |= S_IWUSR; + mode |= 0200; if (!(prot & FIBF_NOREAD)) - mode |= S_IRUSR; + mode |= 0400; if (!(prot & FIBF_NOEXECUTE)) - mode |= S_IXUSR; + mode |= 0100; if (prot & FIBF_GRP_WRITE) - mode |= S_IWGRP; + mode |= 0020; if (prot & FIBF_GRP_READ) - mode |= S_IRGRP; + mode |= 0040; if (prot & FIBF_GRP_EXECUTE) - mode |= S_IXGRP; + mode |= 0010; if (prot & FIBF_OTR_WRITE) - mode |= S_IWOTH; + mode |= 0002; if (prot & FIBF_OTR_READ) - mode |= S_IROTH; + mode |= 0004; if (prot & FIBF_OTR_EXECUTE) - mode |= S_IXOTH; + mode |= 0001; return mode; } void -mode_to_prot(struct inode *inode) +affs_mode_to_prot(struct inode *inode) { u32 prot = AFFS_I(inode)->i_protect; umode_t mode = inode->i_mode; - if (!(mode & S_IXUSR)) + if (!(mode & 0100)) prot |= FIBF_NOEXECUTE; - if (!(mode & S_IRUSR)) + if (!(mode & 0400)) prot |= FIBF_NOREAD; - if (!(mode & S_IWUSR)) + if (!(mode & 0200)) prot |= FIBF_NOWRITE; - if (mode & S_IXGRP) + if (mode & 0010) prot |= FIBF_GRP_EXECUTE; - if (mode & S_IRGRP) + if (mode & 0040) prot |= FIBF_GRP_READ; - if (mode & S_IWGRP) + if (mode & 0020) prot |= FIBF_GRP_WRITE; - if (mode & S_IXOTH) + if (mode & 0001) prot |= FIBF_OTR_EXECUTE; - if (mode & S_IROTH) + if (mode & 0004) prot |= FIBF_OTR_READ; - if (mode & S_IWOTH) + if (mode & 0002) prot |= FIBF_OTR_WRITE; AFFS_I(inode)->i_protect = prot; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index fe4e1290dbb5..a5e6097eb5a9 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) if (affs_test_opt(sbi->s_flags, SF_SETMODE)) inode->i_mode = sbi->s_mode; else - inode->i_mode = prot_to_mode(prot); + inode->i_mode = affs_prot_to_mode(prot); id = be16_to_cpu(tail->uid); if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID)) @@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) } tail = AFFS_TAIL(sb, bh); if (tail->stype == cpu_to_be32(ST_ROOT)) { - secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, bh)->root_change); + affs_secs_to_datestamp(inode->i_mtime.tv_sec, + &AFFS_ROOT_TAIL(sb, bh)->root_change); } else { tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect); tail->size = cpu_to_be32(inode->i_size); - secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change); + affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change); if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { uid = i_uid_read(inode); gid = i_gid_read(inode); @@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr) mark_inode_dirty(inode); if (attr->ia_valid & ATTR_MODE) - mode_to_prot(inode); + affs_mode_to_prot(inode); out: return error; } diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 29186d29a3b6..96dd1d09a273 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -9,29 +9,10 @@ */ #include "affs.h" +#include <linux/exportfs.h> typedef int (*toupper_t)(int); -static int affs_toupper(int ch); -static int affs_hash_dentry(const struct dentry *, struct qstr *); -static int affs_compare_dentry(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name); -static int affs_intl_toupper(int ch); -static int affs_intl_hash_dentry(const struct dentry *, struct qstr *); -static int affs_intl_compare_dentry(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name); - -const struct dentry_operations affs_dentry_operations = { - .d_hash = affs_hash_dentry, - .d_compare = affs_compare_dentry, -}; - -const struct dentry_operations affs_intl_dentry_operations = { - .d_hash = affs_intl_hash_dentry, - .d_compare = affs_intl_compare_dentry, -}; - - /* Simple toupper() for DOS\1 */ static int @@ -271,7 +252,7 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) return -ENOSPC; inode->i_mode = mode; - mode_to_prot(inode); + affs_mode_to_prot(inode); mark_inode_dirty(inode); inode->i_op = &affs_file_inode_operations; @@ -301,7 +282,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return -ENOSPC; inode->i_mode = S_IFDIR | mode; - mode_to_prot(inode); + affs_mode_to_prot(inode); inode->i_op = &affs_dir_inode_operations; inode->i_fop = &affs_dir_operations; @@ -347,7 +328,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) inode_nohighmem(inode); inode->i_data.a_ops = &affs_symlink_aops; inode->i_mode = S_IFLNK | 0777; - mode_to_prot(inode); + affs_mode_to_prot(inode); error = -EIO; bh = affs_bread(sb, inode->i_ino); @@ -465,3 +446,71 @@ done: affs_brelse(bh); return retval; } + +static struct dentry *affs_get_parent(struct dentry *child) +{ + struct inode *parent; + struct buffer_head *bh; + + bh = affs_bread(child->d_sb, d_inode(child)->i_ino); + if (!bh) + return ERR_PTR(-EIO); + + parent = affs_iget(child->d_sb, + be32_to_cpu(AFFS_TAIL(child->d_sb, bh)->parent)); + brelse(bh); + if (IS_ERR(parent)) + return ERR_CAST(parent); + + return d_obtain_alias(parent); +} + +static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino, + u32 generation) +{ + struct inode *inode; + + if (!affs_validblock(sb, ino)) + return ERR_PTR(-ESTALE); + + inode = affs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + return inode; +} + +static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + affs_nfs_get_inode); +} + +static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + affs_nfs_get_inode); +} + +const struct export_operations affs_export_ops = { + .fh_to_dentry = affs_fh_to_dentry, + .fh_to_parent = affs_fh_to_parent, + .get_parent = affs_get_parent, +}; + +const struct dentry_operations affs_dentry_operations = { + .d_hash = affs_hash_dentry, + .d_compare = affs_compare_dentry, +}; + +const struct dentry_operations affs_intl_dentry_operations = { + .d_hash = affs_intl_hash_dentry, + .d_compare = affs_intl_compare_dentry, +}; diff --git a/fs/affs/super.c b/fs/affs/super.c index d6384863192c..37532538e8ab 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait) struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); lock_buffer(bh); - secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change); + affs_secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); unlock_buffer(bh); @@ -507,6 +507,7 @@ got_root: return -ENOMEM; } + sb->s_export_op = &affs_export_ops; pr_debug("s_flags=%lX\n", sb->s_flags); return 0; } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 51a241e09fbb..949f960337f5 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -252,7 +252,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx, /* skip entries marked unused in the bitmap */ if (!(block->pagehdr.bitmap[offset / 8] & (1 << (offset % 8)))) { - _debug("ENT[%Zu.%u]: unused", + _debug("ENT[%zu.%u]: unused", blkoff / sizeof(union afs_dir_block), offset); if (offset >= curr) ctx->pos = blkoff + @@ -266,7 +266,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx, sizeof(*block) - offset * sizeof(union afs_dirent)); - _debug("ENT[%Zu.%u]: %s %Zu \"%s\"", + _debug("ENT[%zu.%u]: %s %zu \"%s\"", blkoff / sizeof(union afs_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); @@ -274,23 +274,23 @@ static int afs_dir_iterate_block(struct dir_context *ctx, /* work out where the next possible entry is */ for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) { if (next >= AFS_DIRENT_PER_BLOCK) { - _debug("ENT[%Zu.%u]:" + _debug("ENT[%zu.%u]:" " %u travelled beyond end dir block" - " (len %u/%Zu)", + " (len %u/%zu)", blkoff / sizeof(union afs_dir_block), offset, next, tmp, nlen); return -EIO; } if (!(block->pagehdr.bitmap[next / 8] & (1 << (next % 8)))) { - _debug("ENT[%Zu.%u]:" - " %u unmarked extension (len %u/%Zu)", + _debug("ENT[%zu.%u]:" + " %u unmarked extension (len %u/%zu)", blkoff / sizeof(union afs_dir_block), offset, next, tmp, nlen); return -EIO; } - _debug("ENT[%Zu.%u]: ext %u/%Zu", + _debug("ENT[%zu.%u]: ext %u/%zu", blkoff / sizeof(union afs_dir_block), next, tmp, nlen); next++; @@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx) ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED, 0, &unused); + MAP_SHARED, 0, &unused, NULL); up_write(&mm->mmap_sem); if (IS_ERR((void *)ctx->mmap_base)) { ctx->mmap_size = 0; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 6f48d670c941..806df746f1a9 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -38,8 +38,6 @@ * which have been left busy at at service shutdown. */ -#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) - typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, struct autofs_dev_ioctl *); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 82e8f6edfb48..d79ced925861 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk) pr_debug("waiting for mount name=%pd\n", path->dentry); status = autofs4_wait(sbi, path, NFY_MOUNT); pr_debug("mount wait done status=%d\n", status); + ino->last_used = jiffies; } - ino->last_used = jiffies; return status; } @@ -321,16 +321,21 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) */ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { struct dentry *parent = dentry->d_parent; - struct autofs_info *ino; struct dentry *new; new = d_lookup(parent, &dentry->d_name); if (!new) return NULL; - ino = autofs4_dentry_ino(new); - ino->last_used = jiffies; - dput(path->dentry); - path->dentry = new; + if (new == dentry) + dput(new); + else { + struct autofs_info *ino; + + ino = autofs4_dentry_ino(new); + ino->last_used = jiffies; + dput(path->dentry); + path->dentry = new; + } } return path->dentry; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 73031ec54a7b..77c30f15a02c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -989,7 +989,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_bdi = &noop_backing_dev_info; - bdev->bd_block_size = (1 << inode->i_blkbits); + bdev->bd_block_size = i_blocksize(inode); bdev->bd_part_count = 0; bdev->bd_invalidated = 0; inode->i_mode = S_IFBLK; @@ -1043,13 +1043,22 @@ static struct block_device *bd_acquire(struct inode *inode) spin_lock(&bdev_lock); bdev = inode->i_bdev; - if (bdev) { + if (bdev && !inode_unhashed(bdev->bd_inode)) { bdgrab(bdev); spin_unlock(&bdev_lock); return bdev; } spin_unlock(&bdev_lock); + /* + * i_bdev references block device inode that was already shut down + * (corresponding device got removed). Remove the reference and look + * up block device inode again just in case new device got + * reestablished under the same device number. + */ + if (bdev) + bd_forget(inode); + bdev = bdget(inode->i_rdev); if (bdev) { spin_lock(&bdev_lock); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8299601a3549..7699e16784d3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -956,8 +956,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, /* * add all inline backrefs for bytenr to the list */ -static int __add_inline_refs(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, u64 bytenr, +static int __add_inline_refs(struct btrfs_path *path, u64 bytenr, int *info_level, struct list_head *prefs, struct ref_root *ref_tree, u64 *total_refs, u64 inum) @@ -1284,7 +1283,7 @@ again: */ delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - head = btrfs_find_delayed_ref_head(trans, bytenr); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { atomic_inc(&head->node.refs); @@ -1354,7 +1353,7 @@ again: if (key.objectid == bytenr && (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY)) { - ret = __add_inline_refs(fs_info, path, bytenr, + ret = __add_inline_refs(path, bytenr, &info_level, &prefs, ref_tree, &total_refs, inum); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 1a8fa46ff87e..819a6d27218a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -224,16 +224,16 @@ static inline void btrfs_insert_inode_hash(struct inode *inode) __insert_inode_hash(inode, h); } -static inline u64 btrfs_ino(struct inode *inode) +static inline u64 btrfs_ino(struct btrfs_inode *inode) { - u64 ino = BTRFS_I(inode)->location.objectid; + u64 ino = inode->location.objectid; /* * !ino: btree_inode * type == BTRFS_ROOT_ITEM_KEY: subvol dir */ - if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY) - ino = inode->i_ino; + if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY) + ino = inode->vfs_inode.i_ino; return ino; } @@ -248,23 +248,21 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (root == root->fs_info->tree_root && - btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID) + btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID) return true; if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) return true; return false; } -static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) +static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { int ret = 0; - spin_lock(&BTRFS_I(inode)->lock); - if (BTRFS_I(inode)->logged_trans == generation && - BTRFS_I(inode)->last_sub_trans <= - BTRFS_I(inode)->last_log_commit && - BTRFS_I(inode)->last_sub_trans <= - BTRFS_I(inode)->root->last_log_commit) { + spin_lock(&inode->lock); + if (inode->logged_trans == generation && + inode->last_sub_trans <= inode->last_log_commit && + inode->last_sub_trans <= inode->root->last_log_commit) { /* * After a ranged fsync we might have left some extent maps * (that fall outside the fsync's range). So return false @@ -272,10 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) * will be called and process those extent maps. */ smp_mb(); - if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) + if (list_empty(&inode->extent_tree.modified_extents)) ret = 1; } - spin_unlock(&BTRFS_I(inode)->lock); + spin_unlock(&inode->lock); return ret; } @@ -326,6 +324,24 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) &BTRFS_I(inode)->runtime_flags); } +static inline void btrfs_print_data_csum_error(struct inode *inode, + u64 logical_start, u32 csum, u32 csum_expected, int mirror_num) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + /* Output minus objectid, which is more meaningful */ + if (root->objectid >= BTRFS_LAST_FREE_OBJECTID) + btrfs_warn_rl(root->fs_info, + "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d", + root->objectid, btrfs_ino(BTRFS_I(inode)), + logical_start, csum, csum_expected, mirror_num); + else + btrfs_warn_rl(root->fs_info, + "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d", + root->objectid, btrfs_ino(BTRFS_I(inode)), + logical_start, csum, csum_expected, mirror_num); +} + bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end); #endif diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index c4444d6f439f..903c32c9eb22 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -124,10 +124,8 @@ static int check_compressed_csum(struct inode *inode, kunmap_atomic(kaddr); if (csum != *cb_sum) { - btrfs_info(BTRFS_I(inode)->root->fs_info, - "csum failed ino %llu extent %llu csum %u wanted %u mirror %d", - btrfs_ino(inode), disk_start, csum, *cb_sum, - cb->mirror_num); + btrfs_print_data_csum_error(inode, disk_start, csum, + *cb_sum, cb->mirror_num); ret = -EIO; goto fail; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a426dc822d4d..1192bc7d2ee7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -28,9 +28,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); -static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size, int extend); +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *ins_key, struct btrfs_path *path, + int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct extent_buffer *dst, @@ -426,7 +426,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); - tm = container_of(node, struct tree_mod_elem, node); + tm = rb_entry(node, struct tree_mod_elem, node); if (tm->seq > min_seq) continue; rb_erase(node, tm_root); @@ -460,7 +460,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; while (*new) { - cur = container_of(*new, struct tree_mod_elem, node); + cur = rb_entry(*new, struct tree_mod_elem, node); parent = *new; if (cur->logical < tm->logical) new = &((*new)->rb_left); @@ -746,7 +746,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { - cur = container_of(node, struct tree_mod_elem, node); + cur = rb_entry(node, struct tree_mod_elem, node); if (cur->logical < start) { node = node->rb_left; } else if (cur->logical > start) { @@ -1074,7 +1074,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1); BUG_ON(ret); /* -ENOMEM */ } - clean_tree_block(trans, fs_info, buf); + clean_tree_block(fs_info, buf); *last_ref = 1; } return 0; @@ -1326,7 +1326,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, next = rb_next(&tm->node); if (!next) break; - tm = container_of(next, struct tree_mod_elem, node); + tm = rb_entry(next, struct tree_mod_elem, node); if (tm->logical != first_tm->logical) break; } @@ -1580,7 +1580,8 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) /* * compare two keys in a memcmp fashion */ -static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +static int comp_keys(const struct btrfs_disk_key *disk, + const struct btrfs_key *k2) { struct btrfs_key k1; @@ -1592,7 +1593,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) /* * same as comp_keys only with two btrfs_key's */ -int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) +int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) { if (k1->objectid > k2->objectid) return 1; @@ -1732,8 +1733,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * slot may point to max if the key is bigger than all of the keys */ static noinline int generic_bin_search(struct extent_buffer *eb, - unsigned long p, - int item_size, struct btrfs_key *key, + unsigned long p, int item_size, + const struct btrfs_key *key, int max, int *slot) { int low = 0; @@ -1802,7 +1803,7 @@ static noinline int generic_bin_search(struct extent_buffer *eb, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, +static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int level, int *slot) { if (level == 0) @@ -1819,7 +1820,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, slot); } -int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, +int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int level, int *slot) { return bin_search(eb, key, level, slot); @@ -1937,7 +1938,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, path->locks[level] = 0; path->nodes[level] = NULL; - clean_tree_block(trans, fs_info, mid); + clean_tree_block(fs_info, mid); btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); @@ -1998,7 +1999,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - clean_tree_block(trans, fs_info, right); + clean_tree_block(fs_info, right); btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); @@ -2042,7 +2043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BUG_ON(wret == 1); } if (btrfs_header_nritems(mid) == 0) { - clean_tree_block(trans, fs_info, mid); + clean_tree_block(fs_info, mid); btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); @@ -2437,10 +2438,9 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) * reada. -EAGAIN is returned and the search must be repeated. */ static int -read_block_for_search(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *p, - struct extent_buffer **eb_ret, int level, int slot, - struct btrfs_key *key, u64 time_seq) +read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, + struct extent_buffer **eb_ret, int level, int slot, + const struct btrfs_key *key) { struct btrfs_fs_info *fs_info = root->fs_info; u64 blocknr; @@ -2587,7 +2587,7 @@ done: } static void key_search_validate(struct extent_buffer *b, - struct btrfs_key *key, + const struct btrfs_key *key, int level) { #ifdef CONFIG_BTRFS_ASSERT @@ -2606,7 +2606,7 @@ static void key_search_validate(struct extent_buffer *b, #endif } -static int key_search(struct extent_buffer *b, struct btrfs_key *key, +static int key_search(struct extent_buffer *b, const struct btrfs_key *key, int level, int *prev_cmp, int *slot) { if (*prev_cmp != 0) { @@ -2668,9 +2668,9 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *key, struct btrfs_path *p, int - ins_len, int cow) +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *key, struct btrfs_path *p, + int ins_len, int cow) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; @@ -2870,8 +2870,8 @@ cow_done: goto done; } - err = read_block_for_search(trans, root, p, - &b, level, slot, key, 0); + err = read_block_for_search(root, p, &b, level, + slot, key); if (err == -EAGAIN) goto again; if (err) { @@ -2953,7 +2953,7 @@ done: * The resulting path and return value will be set up as if we called * btrfs_search_slot at that point in time with ins_len and cow both set to 0. */ -int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, +int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, u64 time_seq) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -3014,8 +3014,8 @@ again: goto done; } - err = read_block_for_search(NULL, root, p, &b, level, - slot, key, time_seq); + err = read_block_for_search(root, p, &b, level, + slot, key); if (err == -EAGAIN) goto again; if (err) { @@ -3067,8 +3067,9 @@ done: * < 0 on error */ int btrfs_search_slot_for_read(struct btrfs_root *root, - struct btrfs_key *key, struct btrfs_path *p, - int find_higher, int return_any) + const struct btrfs_key *key, + struct btrfs_path *p, int find_higher, + int return_any) { int ret; struct extent_buffer *leaf; @@ -3166,7 +3167,7 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info, */ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - struct btrfs_key *new_key) + const struct btrfs_key *new_key) { struct btrfs_disk_key disk_key; struct extent_buffer *eb; @@ -3594,8 +3595,7 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, * min slot controls the lowest index we're willing to push to the * right. We'll push up to and including min_slot, but no lower */ -static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, +static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, struct btrfs_path *path, int data_size, int empty, struct extent_buffer *right, @@ -3704,7 +3704,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (left_nritems) btrfs_mark_buffer_dirty(left); else - clean_tree_block(trans, fs_info, left); + clean_tree_block(fs_info, left); btrfs_mark_buffer_dirty(right); @@ -3716,7 +3716,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; if (btrfs_header_nritems(path->nodes[0]) == 0) - clean_tree_block(trans, fs_info, path->nodes[0]); + clean_tree_block(fs_info, path->nodes[0]); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3809,7 +3809,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } - return __push_leaf_right(trans, fs_info, path, min_data_size, empty, + return __push_leaf_right(fs_info, path, min_data_size, empty, right, free_space, left_nritems, min_slot); out_unlock: btrfs_tree_unlock(right); @@ -3825,8 +3825,7 @@ out_unlock: * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the * items */ -static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, +static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, struct btrfs_path *path, int data_size, int empty, struct extent_buffer *left, int free_space, u32 right_nritems, @@ -3945,7 +3944,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, if (right_nritems) btrfs_mark_buffer_dirty(right); else - clean_tree_block(trans, fs_info, right); + clean_tree_block(fs_info, right); btrfs_item_key(right, &disk_key, 0); fixup_low_keys(fs_info, path, &disk_key, 1); @@ -4035,7 +4034,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - return __push_leaf_left(trans, fs_info, path, min_data_size, + return __push_leaf_left(fs_info, path, min_data_size, empty, left, free_space, right_nritems, max_slot); out: @@ -4180,7 +4179,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans, */ static noinline int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_key *ins_key, + const struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend) { @@ -4412,10 +4411,9 @@ err: return ret; } -static noinline int split_item(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, +static noinline int split_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - struct btrfs_key *new_key, + const struct btrfs_key *new_key, unsigned long split_offset) { struct extent_buffer *leaf; @@ -4501,7 +4499,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans, int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *new_key, + const struct btrfs_key *new_key, unsigned long split_offset) { int ret; @@ -4510,7 +4508,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = split_item(trans, root->fs_info, path, new_key, split_offset); + ret = split_item(root->fs_info, path, new_key, split_offset); return ret; } @@ -4525,7 +4523,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, int btrfs_duplicate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *new_key) + const struct btrfs_key *new_key) { struct extent_buffer *leaf; int ret; @@ -4726,7 +4724,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, * that doesn't call btrfs_search_slot */ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, + const struct btrfs_key *cpu_key, u32 *data_size, u32 total_data, u32 total_size, int nr) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4820,7 +4818,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, + const struct btrfs_key *cpu_key, u32 *data_size, int nr) { int ret = 0; @@ -4851,9 +4849,9 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *cpu_key, void *data, u32 - data_size) +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *cpu_key, void *data, + u32 data_size) { int ret = 0; struct btrfs_path *path; @@ -5008,7 +5006,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(leaf, 0); } else { btrfs_set_path_blocking(path); - clean_tree_block(trans, fs_info, leaf); + clean_tree_block(fs_info, leaf); btrfs_del_leaf(trans, root, path, leaf); } } else { @@ -5243,7 +5241,7 @@ out: static int tree_move_down(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - int *level, int root_level) + int *level) { struct extent_buffer *eb; @@ -5258,8 +5256,7 @@ static int tree_move_down(struct btrfs_fs_info *fs_info, return 0; } -static int tree_move_next_or_upnext(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +static int tree_move_next_or_upnext(struct btrfs_path *path, int *level, int root_level) { int ret = 0; @@ -5298,10 +5295,9 @@ static int tree_advance(struct btrfs_fs_info *fs_info, int ret; if (*level == 0 || !allow_down) { - ret = tree_move_next_or_upnext(fs_info, path, level, - root_level); + ret = tree_move_next_or_upnext(path, level, root_level); } else { - ret = tree_move_down(fs_info, path, level, root_level); + ret = tree_move_down(fs_info, path, level); } if (ret >= 0) { if (*level == 0) @@ -5784,8 +5780,8 @@ again: next = c; next_rw_lock = path->locks[level]; - ret = read_block_for_search(NULL, root, path, &next, level, - slot, &key, 0); + ret = read_block_for_search(root, path, &next, level, + slot, &key); if (ret == -EAGAIN) goto again; @@ -5834,8 +5830,8 @@ again: if (!level) break; - ret = read_block_for_search(NULL, root, path, &next, level, - 0, &key, 0); + ret = read_block_for_search(root, path, &next, level, + 0, &key); if (ret == -EAGAIN) goto again; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6a823719b6c5..105d4d43993e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -97,6 +97,14 @@ static const int btrfs_csum_sizes[] = { 4 }; #define BTRFS_MAX_EXTENT_SIZE SZ_128M +/* + * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size + */ +static inline u32 count_max_extents(u64 size) +{ + return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); +} + struct btrfs_mapping_tree { struct extent_map_tree map_tree; }; @@ -1953,7 +1961,7 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, - struct btrfs_disk_key *disk) + const struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); cpu->type = disk->type; @@ -1961,7 +1969,7 @@ static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, } static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, - struct btrfs_key *cpu) + const struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); disk->type = cpu->type; @@ -1993,8 +2001,7 @@ static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, btrfs_disk_key_to_cpu(key, &disk_key); } - -static inline u8 btrfs_key_type(struct btrfs_key *key) +static inline u8 btrfs_key_type(const struct btrfs_key *key) { return key->type; } @@ -2577,8 +2584,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes); int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info, struct extent_buffer *eb); -int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, u64 bytenr); struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, @@ -2587,10 +2593,11 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int get_block_group_index(struct btrfs_block_group_cache *cache); struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 parent, - u64 root_objectid, - struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size); + struct btrfs_root *root, + u64 parent, u64 root_objectid, + const struct btrfs_disk_key *key, + int level, u64 hint, + u64 empty_size); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -2623,8 +2630,7 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len, int delalloc); int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); -void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); +void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -2696,8 +2702,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, int nitems, u64 *qgroup_reserved, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv, - u64 qgroup_reserved); + struct btrfs_block_rsv *rsv); int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len); @@ -2724,7 +2729,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_inc_block_group_ro(struct btrfs_root *root, +int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache); void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); @@ -2750,9 +2755,9 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group, struct btrfs_fs_info *info, u64 start, u64 end); /* ctree.c */ -int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, +int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int level, int *slot); -int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2); +int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2); int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); @@ -2760,7 +2765,7 @@ int btrfs_previous_extent_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid); void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - struct btrfs_key *new_key); + const struct btrfs_key *new_key); struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, @@ -2802,22 +2807,23 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info, int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *new_key, + const struct btrfs_key *new_key, unsigned long split_offset); int btrfs_duplicate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *new_key); + const struct btrfs_key *new_key); int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key); -int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *key, struct btrfs_path *p, int - ins_len, int cow); -int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *key, struct btrfs_path *p, + int ins_len, int cow); +int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, u64 time_seq); int btrfs_search_slot_for_read(struct btrfs_root *root, - struct btrfs_key *key, struct btrfs_path *p, - int find_higher, int return_any); + const struct btrfs_key *key, + struct btrfs_path *p, int find_higher, + int return_any); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, u64 *last_ret, @@ -2840,19 +2846,20 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, } void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, + const struct btrfs_key *cpu_key, u32 *data_size, u32 total_data, u32 total_size, int nr); -int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *key, void *data, u32 data_size); +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *key, void *data, u32 data_size); int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, int nr); + const struct btrfs_key *cpu_key, u32 *data_size, + int nr); static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, + const struct btrfs_key *key, u32 data_size) { return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1); @@ -2941,15 +2948,15 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, const char *name, int name_len); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_key *key); -int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *key, struct btrfs_root_item - *item); + const struct btrfs_key *key); +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const struct btrfs_key *key, + struct btrfs_root_item *item); int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); -int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, +int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key, struct btrfs_path *path, struct btrfs_root_item *root_item, struct btrfs_key *root_key); int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info); @@ -3119,7 +3126,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, const char *name, int name_len); int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, @@ -3147,7 +3154,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); +int btrfs_page_mkwrite(struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); @@ -3447,7 +3454,8 @@ do { \ "BTRFS: Transaction aborted (error %d)\n", \ (errno)); \ } else { \ - pr_debug("BTRFS: Transaction aborted (error %d)\n", \ + btrfs_debug((trans)->fs_info, \ + "Transaction aborted (error %d)", \ (errno)); \ } \ } \ diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 80982a83c9fd..f7a6ee5ccc80 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -72,14 +72,14 @@ static inline int btrfs_is_continuous_delayed_item( return 0; } -static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) +static struct btrfs_delayed_node *btrfs_get_delayed_node( + struct btrfs_inode *btrfs_inode) { - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); struct btrfs_root *root = btrfs_inode->root; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(btrfs_inode); struct btrfs_delayed_node *node; - node = ACCESS_ONCE(btrfs_inode->delayed_node); + node = READ_ONCE(btrfs_inode->delayed_node); if (node) { atomic_inc(&node->refs); return node; @@ -107,16 +107,15 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) /* Will return either the node or PTR_ERR(-ENOMEM) */ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( - struct inode *inode) + struct btrfs_inode *btrfs_inode) { struct btrfs_delayed_node *node; - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); struct btrfs_root *root = btrfs_inode->root; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(btrfs_inode); int ret; again: - node = btrfs_get_delayed_node(inode); + node = btrfs_get_delayed_node(btrfs_inode); if (node) return node; @@ -574,7 +573,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info, static int btrfs_delayed_inode_reserve_metadata( struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_delayed_node *node) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -603,13 +602,13 @@ static int btrfs_delayed_inode_reserve_metadata( * worth which is less likely to hurt us. */ if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { - spin_lock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) + &inode->runtime_flags)) release = true; else src_rsv = NULL; - spin_unlock(&BTRFS_I(inode)->lock); + spin_unlock(&inode->lock); } /* @@ -1196,7 +1195,7 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, } int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, - struct inode *inode) + struct btrfs_inode *inode) { struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); struct btrfs_path *path; @@ -1233,9 +1232,9 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, return ret; } -int btrfs_commit_inode_delayed_inode(struct inode *inode) +int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); struct btrfs_trans_handle *trans; struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); struct btrfs_path *path; @@ -1288,15 +1287,15 @@ out: return ret; } -void btrfs_remove_delayed_node(struct inode *inode) +void btrfs_remove_delayed_node(struct btrfs_inode *inode) { struct btrfs_delayed_node *delayed_node; - delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node); + delayed_node = READ_ONCE(inode->delayed_node); if (!delayed_node) return; - BTRFS_I(inode)->delayed_node = NULL; + inode->delayed_node = NULL; btrfs_release_delayed_node(delayed_node); } @@ -1434,7 +1433,7 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, const char *name, int name_len, - struct inode *dir, + struct btrfs_inode *dir, struct btrfs_disk_key *disk_key, u8 type, u64 index) { @@ -1510,7 +1509,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info, int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, - struct inode *dir, u64 index) + struct btrfs_inode *dir, u64 index) { struct btrfs_delayed_node *node; struct btrfs_delayed_item *item; @@ -1558,7 +1557,7 @@ end: return ret; } -int btrfs_inode_delayed_dir_index_count(struct inode *inode) +int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) { struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); @@ -1575,7 +1574,7 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) return -EINVAL; } - BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; + inode->index_cnt = delayed_node->index_cnt; btrfs_release_delayed_node(delayed_node); return 0; } @@ -1587,7 +1586,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_item *item; - delayed_node = btrfs_get_delayed_node(inode); + delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); if (!delayed_node) return false; @@ -1776,7 +1775,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) struct btrfs_delayed_node *delayed_node; struct btrfs_inode_item *inode_item; - delayed_node = btrfs_get_delayed_node(inode); + delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); if (!delayed_node) return -ENOENT; @@ -1831,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *delayed_node; int ret = 0; - delayed_node = btrfs_get_or_create_delayed_node(inode); + delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode)); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); @@ -1841,7 +1840,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, goto release_node; } - ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode, + ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode), delayed_node); if (ret) goto release_node; @@ -1856,9 +1855,9 @@ release_node: return ret; } -int btrfs_delayed_delete_inode_ref(struct inode *inode) +int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); struct btrfs_delayed_node *delayed_node; /* @@ -1933,7 +1932,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) mutex_unlock(&delayed_node->mutex); } -void btrfs_kill_delayed_inode_items(struct inode *inode) +void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) { struct btrfs_delayed_node *delayed_node; diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 8a2bf5e3e4cf..40327cc3b99a 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -101,15 +101,15 @@ static inline void btrfs_init_delayed_root( int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, const char *name, int name_len, - struct inode *dir, + struct btrfs_inode *dir, struct btrfs_disk_key *disk_key, u8 type, u64 index); int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, - struct inode *dir, u64 index); + struct btrfs_inode *dir, u64 index); -int btrfs_inode_delayed_dir_index_count(struct inode *inode); +int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode); int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); @@ -119,17 +119,17 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info); int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, - struct inode *inode); + struct btrfs_inode *inode); /* Used for evicting the inode. */ -void btrfs_remove_delayed_node(struct inode *inode); -void btrfs_kill_delayed_inode_items(struct inode *inode); -int btrfs_commit_inode_delayed_inode(struct inode *inode); +void btrfs_remove_delayed_node(struct btrfs_inode *inode); +void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode); +int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode); int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); int btrfs_fill_inode(struct inode *inode, u32 *rdev); -int btrfs_delayed_delete_inode_ref(struct inode *inode); +int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode); /* Used for drop dead root */ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ef724a5fc30e..6eb80952efb3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *ref, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, - int action, int is_data) + int action, int is_data, int *qrecord_inserted_ret) { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; int count_mod = 1; int must_insert_reserved = 0; + int qrecord_inserted = 0; /* If reserved is provided, it must be a data extent. */ BUG_ON(!is_data && reserved); @@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, if(btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord)) kfree(qrecord); + else + qrecord_inserted = 1; } spin_lock_init(&head_ref->lock); @@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, atomic_inc(&delayed_refs->num_entries); trans->delayed_ref_updates++; } + if (qrecord_inserted_ret) + *qrecord_inserted_ret = qrecord_inserted; return head_ref; } @@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; + int qrecord_inserted; BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * the spin lock */ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, - bytenr, num_bytes, 0, 0, action, 0); + bytenr, num_bytes, 0, 0, action, 0, + &qrecord_inserted); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action); spin_unlock(&delayed_refs->lock); + if (qrecord_inserted) + return btrfs_qgroup_trace_extent_post(fs_info, record); return 0; free_head_ref: @@ -829,15 +838,14 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, - u64 owner, u64 offset, u64 reserved, int action, - struct btrfs_delayed_extent_op *extent_op) + u64 owner, u64 offset, u64 reserved, int action) { struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; + int qrecord_inserted; - BUG_ON(extent_op && !extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); if (!ref) return -ENOMEM; @@ -859,7 +867,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, } } - head_ref->extent_op = extent_op; + head_ref->extent_op = NULL; delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); @@ -870,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, bytenr, num_bytes, ref_root, reserved, - action, 1); + action, 1, &qrecord_inserted); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action); spin_unlock(&delayed_refs->lock); + if (qrecord_inserted) + return btrfs_qgroup_trace_extent_post(fs_info, record); return 0; } @@ -899,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data); + extent_op->is_data, NULL); spin_unlock(&delayed_refs->lock); return 0; @@ -911,11 +921,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, * the head node if any where found, or NULL if not. */ struct btrfs_delayed_ref_head * -btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) +btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) { - struct btrfs_delayed_ref_root *delayed_refs; - - delayed_refs = &trans->transaction->delayed_refs; return find_ref_head(&delayed_refs->href_root, bytenr, 0); } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 50947b5a9152..0e537f98f1a1 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -250,8 +250,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, - u64 owner, u64 offset, u64 reserved, int action, - struct btrfs_delayed_extent_op *extent_op); + u64 owner, u64 offset, u64 reserved, int action); int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, @@ -262,7 +261,8 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head * -btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); +btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, + u64 bytenr); int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *head); static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b039fe0c751a..724504a2d7ac 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -133,7 +133,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_disk_key disk_key; u32 data_size; - key.objectid = btrfs_ino(dir); + key.objectid = btrfs_ino(BTRFS_I(dir)); key.type = BTRFS_DIR_ITEM_KEY; key.offset = btrfs_name_hash(name, name_len); @@ -174,8 +174,7 @@ second_insert: btrfs_release_path(path); ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name, - name_len, dir, &disk_key, type, - index); + name_len, BTRFS_I(dir), &disk_key, type, index); out_free: btrfs_free_path(path); if (ret) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 37a31b12bb0c..207db0270b15 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -64,8 +64,7 @@ static const struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); -static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, - int read_only); +static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info); @@ -1005,7 +1004,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, return ret; } -static int check_async_write(struct inode *inode, unsigned long bio_flags) +static int check_async_write(unsigned long bio_flags) { if (bio_flags & EXTENT_BIO_TREE_LOG) return 0; @@ -1021,7 +1020,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, u64 bio_offset) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int async = check_async_write(inode, bio_flags); + int async = check_async_write(bio_flags); int ret; if (bio_op(bio) != REQ_OP_WRITE) { @@ -1248,8 +1247,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, } -void clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, +void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf) { if (btrfs_header_generation(buf) == @@ -2802,7 +2800,7 @@ int open_ctree(struct super_block *sb, memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); - ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); + ret = btrfs_check_super_valid(fs_info); if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); err = -EINVAL; @@ -3411,7 +3409,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) */ static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, - int do_barriers, int wait, int max_mirrors) + int wait, int max_mirrors) { struct buffer_head *bh; int i; @@ -3696,7 +3694,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( return num_tolerated_disk_barrier_failures; } -static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) +int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) { struct list_head *head; struct btrfs_device *dev; @@ -3753,7 +3751,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); - ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors); + ret = write_dev_supers(dev, sb, 0, max_mirrors); if (ret) total_errors++; } @@ -3776,7 +3774,7 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) if (!dev->in_fs_metadata || !dev->writeable) continue; - ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors); + ret = write_dev_supers(dev, sb, 1, max_mirrors); if (ret) total_errors++; } @@ -3790,12 +3788,6 @@ static int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) return 0; } -int write_ctree_super(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, int max_mirrors) -{ - return write_all_supers(fs_info, max_mirrors); -} - /* Drop a fs root from the radix tree and free it. */ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -4122,8 +4114,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return btree_read_extent_buffer_pages(fs_info, buf, parent_transid); } -static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, - int read_only) +static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) { struct btrfs_super_block *sb = fs_info->super_copy; u64 nodesize = btrfs_super_nodesize(sb); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 44dcd9af6b7c..0be2d4fe705b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -52,14 +52,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, struct extent_buffer *btrfs_find_create_tree_block( struct btrfs_fs_info *fs_info, u64 bytenr); -void clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *buf); +void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf); int open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options); void close_ctree(struct btrfs_fs_info *fs_info); -int write_ctree_super(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, int max_mirrors); +int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num, struct buffer_head **bh_ret); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 340d90751263..87144c9f9593 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -30,7 +30,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, len = BTRFS_FID_SIZE_NON_CONNECTABLE; type = FILEID_BTRFS_WITHOUT_PARENT; - fid->objectid = btrfs_ino(inode); + fid->objectid = btrfs_ino(BTRFS_I(inode)); fid->root_objectid = BTRFS_I(inode)->root->objectid; fid->gen = inode->i_generation; @@ -166,13 +166,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child) if (!path) return ERR_PTR(-ENOMEM); - if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) { + if (btrfs_ino(BTRFS_I(dir)) == BTRFS_FIRST_FREE_OBJECTID) { key.objectid = root->root_key.objectid; key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = (u64)-1; root = fs_info->tree_root; } else { - key.objectid = btrfs_ino(dir); + key.objectid = btrfs_ino(BTRFS_I(dir)); key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; } @@ -235,13 +235,10 @@ static int btrfs_get_name(struct dentry *parent, char *name, int ret; u64 ino; - if (!dir || !inode) - return -EINVAL; - if (!S_ISDIR(dir->i_mode)) return -EINVAL; - ino = btrfs_ino(inode); + ino = btrfs_ino(BTRFS_I(inode)); path = btrfs_alloc_path(); if (!path) @@ -255,7 +252,7 @@ static int btrfs_get_name(struct dentry *parent, char *name, root = fs_info->tree_root; } else { key.objectid = ino; - key.offset = btrfs_ino(dir); + key.offset = btrfs_ino(BTRFS_I(dir)); key.type = BTRFS_INODE_REF_KEY; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dcd2e798767e..c35b96633554 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -888,7 +888,7 @@ search_again: delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - head = btrfs_find_delayed_ref_head(trans, bytenr); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { atomic_inc(&head->node.refs); @@ -1035,10 +1035,11 @@ out_free: #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 owner, u32 extra_size) { + struct btrfs_root *root = fs_info->extent_root; struct btrfs_extent_item *item; struct btrfs_extent_item_v0 *ei0; struct btrfs_extent_ref_v0 *ref0; @@ -1092,7 +1093,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans, return ret; BUG_ON(ret); /* Corruption */ - btrfs_extend_item(root->fs_info, path, new_size); + btrfs_extend_item(fs_info, path, new_size); leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); @@ -1151,12 +1152,13 @@ static int match_extent_data_ref(struct extent_buffer *leaf, } static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid, u64 owner, u64 offset) { + struct btrfs_root *root = fs_info->extent_root; struct btrfs_key key; struct btrfs_extent_data_ref *ref; struct extent_buffer *leaf; @@ -1238,12 +1240,13 @@ fail: } static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add) { + struct btrfs_root *root = fs_info->extent_root; struct btrfs_key key; struct extent_buffer *leaf; u32 size; @@ -1317,7 +1320,7 @@ fail: } static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, int refs_to_drop, int *last_ref) { @@ -1354,7 +1357,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, num_refs -= refs_to_drop; if (num_refs == 0) { - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, fs_info->extent_root, path); *last_ref = 1; } else { if (key.type == BTRFS_EXTENT_DATA_REF_KEY) @@ -1416,11 +1419,12 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path, } static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid) { + struct btrfs_root *root = fs_info->extent_root; struct btrfs_key key; int ret; @@ -1449,7 +1453,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, } static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid) @@ -1466,7 +1470,8 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, key.offset = root_objectid; } - ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + ret = btrfs_insert_empty_item(trans, fs_info->extent_root, + path, &key, 0); btrfs_release_path(path); return ret; } @@ -1524,14 +1529,14 @@ static int find_next_key(struct btrfs_path *path, int level, */ static noinline_for_stack int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref **ref_ret, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset, int insert) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *root = fs_info->extent_root; struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_extent_item *ei; @@ -1614,7 +1619,7 @@ again: err = -ENOENT; goto out; } - ret = convert_extent_item_v0(trans, root, path, owner, + ret = convert_extent_item_v0(trans, fs_info, path, owner, extra_size); if (ret < 0) { err = ret; @@ -1716,7 +1721,7 @@ out: * helper to add new inline back ref */ static noinline_for_stack -void setup_inline_extent_backref(struct btrfs_root *root, +void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, u64 parent, u64 root_objectid, @@ -1739,7 +1744,7 @@ void setup_inline_extent_backref(struct btrfs_root *root, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - btrfs_extend_item(root->fs_info, path, size); + btrfs_extend_item(fs_info, path, size); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); @@ -1777,7 +1782,7 @@ void setup_inline_extent_backref(struct btrfs_root *root, } static int lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref **ref_ret, u64 bytenr, u64 num_bytes, u64 parent, @@ -1785,7 +1790,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, { int ret; - ret = lookup_inline_extent_backref(trans, root, path, ref_ret, + ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret, bytenr, num_bytes, parent, root_objectid, owner, offset, 0); if (ret != -ENOENT) @@ -1795,11 +1800,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, *ref_ret = NULL; if (owner < BTRFS_FIRST_FREE_OBJECTID) { - ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, - root_objectid); + ret = lookup_tree_block_ref(trans, fs_info, path, bytenr, + parent, root_objectid); } else { - ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, - root_objectid, owner, offset); + ret = lookup_extent_data_ref(trans, fs_info, path, bytenr, + parent, root_objectid, owner, + offset); } return ret; } @@ -1808,7 +1814,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, * helper to update/remove inline back ref */ static noinline_for_stack -void update_inline_extent_backref(struct btrfs_root *root, +void update_inline_extent_backref(struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_mod, @@ -1866,14 +1872,14 @@ void update_inline_extent_backref(struct btrfs_root *root, memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - btrfs_truncate_item(root->fs_info, path, item_size, 1); + btrfs_truncate_item(fs_info, path, item_size, 1); } btrfs_mark_buffer_dirty(leaf); } static noinline_for_stack int insert_inline_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, @@ -1883,15 +1889,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_extent_inline_ref *iref; int ret; - ret = lookup_inline_extent_backref(trans, root, path, &iref, + ret = lookup_inline_extent_backref(trans, fs_info, path, &iref, bytenr, num_bytes, parent, root_objectid, owner, offset, 1); if (ret == 0) { BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); - update_inline_extent_backref(root, path, iref, + update_inline_extent_backref(fs_info, path, iref, refs_to_add, extent_op, NULL); } else if (ret == -ENOENT) { - setup_inline_extent_backref(root, path, iref, parent, + setup_inline_extent_backref(fs_info, path, iref, parent, root_objectid, owner, offset, refs_to_add, extent_op); ret = 0; @@ -1900,7 +1906,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, } static int insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add) @@ -1908,10 +1914,10 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans, int ret; if (owner < BTRFS_FIRST_FREE_OBJECTID) { BUG_ON(refs_to_add != 1); - ret = insert_tree_block_ref(trans, root, path, bytenr, + ret = insert_tree_block_ref(trans, fs_info, path, bytenr, parent, root_objectid); } else { - ret = insert_extent_data_ref(trans, root, path, bytenr, + ret = insert_extent_data_ref(trans, fs_info, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); } @@ -1919,7 +1925,7 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans, } static int remove_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_fs_info *fs_info, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_drop, int is_data, int *last_ref) @@ -1928,14 +1934,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, BUG_ON(!is_data && refs_to_drop != 1); if (iref) { - update_inline_extent_backref(root, path, iref, + update_inline_extent_backref(fs_info, path, iref, -refs_to_drop, NULL, last_ref); } else if (is_data) { - ret = remove_extent_data_ref(trans, root, path, refs_to_drop, + ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop, last_ref); } else { *last_ref = 1; - ret = btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, fs_info->extent_root, path); } return ret; } @@ -2089,7 +2095,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, 0, - BTRFS_ADD_DELAYED_REF, NULL); + BTRFS_ADD_DELAYED_REF); } return ret; } @@ -2117,9 +2123,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path->reada = READA_FORWARD; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ - ret = insert_inline_extent_backref(trans, fs_info->extent_root, path, - bytenr, num_bytes, parent, - root_objectid, owner, offset, + ret = insert_inline_extent_backref(trans, fs_info, path, bytenr, + num_bytes, parent, root_objectid, + owner, offset, refs_to_add, extent_op); if ((ret < 0 && ret != -EAGAIN) || !ret) goto out; @@ -2143,9 +2149,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path->reada = READA_FORWARD; path->leave_spinning = 1; /* now insert the actual backref */ - ret = insert_extent_backref(trans, fs_info->extent_root, - path, bytenr, parent, root_objectid, - owner, offset, refs_to_add); + ret = insert_extent_backref(trans, fs_info, path, bytenr, parent, + root_objectid, owner, offset, refs_to_add); if (ret) btrfs_abort_transaction(trans, ret); out: @@ -2290,8 +2295,7 @@ again: item_size = btrfs_item_size_nr(leaf, path->slots[0]); #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (item_size < sizeof(*ei)) { - ret = convert_extent_item_v0(trans, fs_info->extent_root, - path, (u64)-1, 0); + ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0); if (ret < 0) { err = ret; goto out; @@ -3028,8 +3032,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, return ret; } -static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static noinline int check_delayed_ref(struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 offset, u64 bytenr) { @@ -3037,11 +3040,16 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_data_ref *data_ref; struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_transaction *cur_trans; int ret = 0; - delayed_refs = &trans->transaction->delayed_refs; + cur_trans = root->fs_info->running_transaction; + if (!cur_trans) + return 0; + + delayed_refs = &cur_trans->delayed_refs; spin_lock(&delayed_refs->lock); - head = btrfs_find_delayed_ref_head(trans, bytenr); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (!head) { spin_unlock(&delayed_refs->lock); return 0; @@ -3090,8 +3098,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, return ret; } -static noinline int check_committed_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static noinline int check_committed_ref(struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 offset, u64 bytenr) { @@ -3162,9 +3169,8 @@ out: return ret; } -int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 offset, u64 bytenr) +int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, + u64 bytenr) { struct btrfs_path *path; int ret; @@ -3175,12 +3181,12 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, return -ENOENT; do { - ret = check_committed_ref(trans, root, path, objectid, + ret = check_committed_ref(root, path, objectid, offset, bytenr); if (ret && ret != -ENOENT) goto out; - ret2 = check_delayed_ref(trans, root, path, objectid, + ret2 = check_delayed_ref(root, path, objectid, offset, bytenr); } while (ret2 == -EAGAIN); @@ -3368,7 +3374,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, if (trans->aborted) return 0; again: - inode = lookup_free_space_inode(root, block_group, path); + inode = lookup_free_space_inode(fs_info, block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { ret = PTR_ERR(inode); btrfs_release_path(path); @@ -3382,7 +3388,8 @@ again: if (block_group->ro) goto out_free; - ret = create_free_space_inode(root, trans, block_group, path); + ret = create_free_space_inode(fs_info, trans, block_group, + path); if (ret) goto out_free; goto again; @@ -3424,7 +3431,7 @@ again: if (ret) goto out_put; - ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); + ret = btrfs_truncate_free_space_cache(trans, NULL, inode); if (ret) goto out_put; } @@ -4119,6 +4126,15 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) return ret; } +static u64 btrfs_space_info_used(struct btrfs_space_info *s_info, + bool may_use_included) +{ + ASSERT(s_info); + return s_info->bytes_used + s_info->bytes_reserved + + s_info->bytes_pinned + s_info->bytes_readonly + + (may_use_included ? s_info->bytes_may_use : 0); +} + int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; @@ -4144,9 +4160,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes) again: /* make sure we have enough space to handle the data first */ spin_lock(&data_sinfo->lock); - used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + - data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + - data_sinfo->bytes_may_use; + used = btrfs_space_info_used(data_sinfo, true); if (used + bytes > data_sinfo->total_bytes) { struct btrfs_trans_handle *trans; @@ -4421,9 +4435,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); spin_lock(&info->lock); - left = info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved - info->bytes_readonly - - info->bytes_may_use; + left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); num_devs = get_profile_num_devs(fs_info, type); @@ -4606,8 +4618,7 @@ static int can_overcommit(struct btrfs_root *root, return 0; profile = btrfs_get_alloc_profile(root, 0); - used = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly; + used = btrfs_space_info_used(space_info, false); /* * We only want to allow over committing if we have lots of actual space @@ -4787,11 +4798,10 @@ skip_async: * get us somewhere and then commit the transaction if it does. Otherwise it * will return -ENOSPC. */ -static int may_commit_transaction(struct btrfs_root *root, +static int may_commit_transaction(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, int force) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; struct btrfs_trans_handle *trans; @@ -4823,7 +4833,7 @@ static int may_commit_transaction(struct btrfs_root *root, spin_unlock(&delayed_rsv->lock); commit: - trans = btrfs_join_transaction(root); + trans = btrfs_join_transaction(fs_info->fs_root); if (IS_ERR(trans)) return -ENOSPC; @@ -4837,11 +4847,11 @@ struct reserve_ticket { wait_queue_head_t wait; }; -static int flush_space(struct btrfs_root *root, +static int flush_space(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 num_bytes, u64 orig_bytes, int state) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *root = fs_info->fs_root; struct btrfs_trans_handle *trans; int nr; int ret = 0; @@ -4881,7 +4891,8 @@ static int flush_space(struct btrfs_root *root, ret = 0; break; case COMMIT_TRANS: - ret = may_commit_transaction(root, space_info, orig_bytes, 0); + ret = may_commit_transaction(fs_info, space_info, + orig_bytes, 0); break; default: ret = -ENOSPC; @@ -4993,8 +5004,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) struct reserve_ticket *ticket; int ret; - ret = flush_space(fs_info->fs_root, space_info, to_reclaim, - to_reclaim, flush_state); + ret = flush_space(fs_info, space_info, to_reclaim, to_reclaim, + flush_state); spin_lock(&space_info->lock); if (list_empty(&space_info->tickets)) { space_info->flush = 0; @@ -5049,8 +5060,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, spin_unlock(&space_info->lock); do { - flush_space(fs_info->fs_root, space_info, to_reclaim, - to_reclaim, flush_state); + flush_space(fs_info, space_info, to_reclaim, to_reclaim, + flush_state); flush_state++; spin_lock(&space_info->lock); if (ticket->bytes == 0) { @@ -5135,9 +5146,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root, spin_lock(&space_info->lock); ret = -ENOSPC; - used = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use; + used = btrfs_space_info_used(space_info, true); /* * If we have enough space then hooray, make our reservation and carry @@ -5630,9 +5639,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->size = min_t(u64, num_bytes, SZ_512M); if (block_rsv->reserved < block_rsv->size) { - num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + - sinfo->bytes_reserved + sinfo->bytes_readonly + - sinfo->bytes_may_use; + num_bytes = btrfs_space_info_used(sinfo, true); if (sinfo->total_bytes > num_bytes) { num_bytes = sinfo->total_bytes - num_bytes; num_bytes = min(num_bytes, @@ -5756,7 +5763,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); trace_btrfs_space_reservation(fs_info, "orphan", - btrfs_ino(inode), num_bytes, 1); + btrfs_ino(BTRFS_I(inode)), num_bytes, 1); return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); } @@ -5767,7 +5774,7 @@ void btrfs_orphan_release_metadata(struct inode *inode) u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); trace_btrfs_space_reservation(fs_info, "orphan", - btrfs_ino(inode), num_bytes, 0); + btrfs_ino(BTRFS_I(inode)), num_bytes, 0); btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes); } @@ -5799,7 +5806,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { /* One for parent inode, two for dir entries */ num_bytes = 3 * fs_info->nodesize; - ret = btrfs_qgroup_reserve_meta(root, num_bytes); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, true); if (ret) return ret; } else { @@ -5824,8 +5831,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, } void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv, - u64 qgroup_reserved) + struct btrfs_block_rsv *rsv) { btrfs_block_rsv_release(fs_info, rsv, (u64)-1); } @@ -5844,11 +5850,9 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes) { unsigned drop_inode_space = 0; unsigned dropped_extents = 0; - unsigned num_extents = 0; + unsigned num_extents; - num_extents = (unsigned)div64_u64(num_bytes + - BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); + num_extents = count_max_extents(num_bytes); ASSERT(num_extents); ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents); BTRFS_I(inode)->outstanding_extents -= num_extents; @@ -5927,7 +5931,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv; u64 to_reserve = 0; u64 csum_bytes; - unsigned nr_extents = 0; + unsigned nr_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; bool delalloc_lock = true; @@ -5960,9 +5964,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&BTRFS_I(inode)->lock); - nr_extents = (unsigned)div64_u64(num_bytes + - BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); + nr_extents = count_max_extents(num_bytes); BTRFS_I(inode)->outstanding_extents += nr_extents; nr_extents = 0; @@ -5979,7 +5981,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { ret = btrfs_qgroup_reserve_meta(root, - nr_extents * fs_info->nodesize); + nr_extents * fs_info->nodesize, true); if (ret) goto out_fail; } @@ -6005,7 +6007,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) if (to_reserve) trace_btrfs_space_reservation(fs_info, "delalloc", - btrfs_ino(inode), to_reserve, 1); + btrfs_ino(BTRFS_I(inode)), to_reserve, 1); if (release_extra) btrfs_block_rsv_release(fs_info, block_rsv, btrfs_calc_trans_metadata_size(fs_info, 1)); @@ -6068,7 +6070,7 @@ out_fail: if (to_free) { btrfs_block_rsv_release(fs_info, block_rsv, to_free); trace_btrfs_space_reservation(fs_info, "delalloc", - btrfs_ino(inode), to_free, 0); + btrfs_ino(BTRFS_I(inode)), to_free, 0); } if (delalloc_lock) mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); @@ -6104,7 +6106,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) return; trace_btrfs_space_reservation(fs_info, "delalloc", - btrfs_ino(inode), to_free, 0); + btrfs_ino(BTRFS_I(inode)), to_free, 0); btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free); } @@ -6561,8 +6563,7 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, spin_unlock(&space_info->lock); return ret; } -void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) { struct btrfs_caching_control *next; struct btrfs_caching_control *caching_ctl; @@ -6845,7 +6846,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (is_data) skinny_metadata = 0; - ret = lookup_extent_backref(trans, extent_root, path, &iref, + ret = lookup_extent_backref(trans, info, path, &iref, bytenr, num_bytes, parent, root_objectid, owner_objectid, owner_offset); @@ -6877,8 +6878,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, #endif if (!found_extent) { BUG_ON(iref); - ret = remove_extent_backref(trans, extent_root, path, - NULL, refs_to_drop, + ret = remove_extent_backref(trans, info, path, NULL, + refs_to_drop, is_data, &last_ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -6953,8 +6954,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (item_size < sizeof(*ei)) { BUG_ON(found_extent || extent_slot != path->slots[0]); - ret = convert_extent_item_v0(trans, extent_root, path, - owner_objectid, 0); + ret = convert_extent_item_v0(trans, info, path, owner_objectid, + 0); if (ret < 0) { btrfs_abort_transaction(trans, ret); goto out; @@ -7021,7 +7022,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); } if (found_extent) { - ret = remove_extent_backref(trans, extent_root, path, + ret = remove_extent_backref(trans, info, path, iref, refs_to_drop, is_data, &last_ref); if (ret) { @@ -7095,7 +7096,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - head = btrfs_find_delayed_ref_head(trans, bytenr); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (!head) goto out_delayed_unlock; @@ -7244,7 +7245,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, num_bytes, parent, root_objectid, owner, offset, 0, - BTRFS_DROP_DELAYED_REF, NULL); + BTRFS_DROP_DELAYED_REF); } return ret; } @@ -7419,12 +7420,11 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache, * If there is no suitable free space, we will record the max size of * the free space extent currently. */ -static noinline int find_free_extent(struct btrfs_root *orig_root, +static noinline int find_free_extent(struct btrfs_fs_info *fs_info, u64 ram_bytes, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, u64 flags, int delalloc) { - struct btrfs_fs_info *fs_info = orig_root->fs_info; int ret = 0; struct btrfs_root *root = fs_info->extent_root; struct btrfs_free_cluster *last_ptr = NULL; @@ -7716,18 +7716,20 @@ unclustered_alloc: last_ptr->fragmented = 1; spin_unlock(&last_ptr->lock); } - spin_lock(&block_group->free_space_ctl->tree_lock); - if (cached && - block_group->free_space_ctl->free_space < - num_bytes + empty_cluster + empty_size) { - if (block_group->free_space_ctl->free_space > - max_extent_size) - max_extent_size = - block_group->free_space_ctl->free_space; - spin_unlock(&block_group->free_space_ctl->tree_lock); - goto loop; + if (cached) { + struct btrfs_free_space_ctl *ctl = + block_group->free_space_ctl; + + spin_lock(&ctl->tree_lock); + if (ctl->free_space < + num_bytes + empty_cluster + empty_size) { + if (ctl->free_space > max_extent_size) + max_extent_size = ctl->free_space; + spin_unlock(&ctl->tree_lock); + goto loop; + } + spin_unlock(&ctl->tree_lock); } - spin_unlock(&block_group->free_space_ctl->tree_lock); offset = btrfs_find_space_for_alloc(block_group, search_start, num_bytes, empty_size, @@ -7908,9 +7910,8 @@ static void dump_space_info(struct btrfs_fs_info *fs_info, spin_lock(&info->lock); btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", info->flags, - info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved - info->bytes_readonly - - info->bytes_may_use, (info->full) ? "" : "not "); + info->total_bytes - btrfs_space_info_used(info, true), + info->full ? "" : "not "); btrfs_info(fs_info, "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", info->total_bytes, info->bytes_used, info->bytes_pinned, @@ -7951,7 +7952,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < fs_info->sectorsize); - ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, + ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size, hint_byte, ins, flags, delalloc); if (!ret && !is_data) { btrfs_dec_block_group_reservations(fs_info, ins->objectid); @@ -8194,8 +8195,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, ins->offset, 0, root_objectid, owner, offset, - ram_bytes, BTRFS_ADD_DELAYED_EXTENT, - NULL); + ram_bytes, BTRFS_ADD_DELAYED_EXTENT); return ret; } @@ -8256,7 +8256,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); - clean_tree_block(trans, fs_info, buf); + clean_tree_block(fs_info, buf); clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); btrfs_set_lock_blocking(buf); @@ -8351,10 +8351,11 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, * returns the tree buffer or an ERR_PTR on error. */ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 parent, u64 root_objectid, - struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size) + struct btrfs_root *root, + u64 parent, u64 root_objectid, + const struct btrfs_disk_key *key, + int level, u64 hint, + u64 empty_size) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key ins; @@ -8876,7 +8877,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, btrfs_set_lock_blocking(eb); path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; } - clean_tree_block(trans, fs_info, eb); + clean_tree_block(fs_info, eb); } if (eb == root->node) { @@ -9346,8 +9347,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force) num_bytes = cache->key.offset - cache->reserved - cache->pinned - cache->bytes_super - btrfs_block_group_used(&cache->item); - if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + - sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes + + if (btrfs_space_info_used(sinfo, true) + num_bytes + min_allocable_bytes <= sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; cache->ro++; @@ -9360,17 +9360,16 @@ out: return ret; } -int btrfs_inc_block_group_ro(struct btrfs_root *root, +int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *cache) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; u64 alloc_flags; int ret; again: - trans = btrfs_join_transaction(root); + trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -9557,9 +9556,8 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) * all of the extents from this block group. If we can, we're good */ if ((space_info->total_bytes != block_group->key.offset) && - (space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - min_free < space_info->total_bytes)) { + (btrfs_space_info_used(space_info, false) + min_free < + space_info->total_bytes)) { spin_unlock(&space_info->lock); goto out; } @@ -10317,7 +10315,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * get the inode first so any iput calls done for the io_list * aren't the final iput (no unlinks allowed now) */ - inode = lookup_free_space_inode(tree_root, block_group, path); + inode = lookup_free_space_inode(fs_info, block_group, path); mutex_lock(&trans->transaction->cache_write_mutex); /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4ac383a3a649..d15b5ddb6732 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -98,7 +98,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, "%s: ino %llu isize %llu odd range [%llu,%llu]", - caller, btrfs_ino(inode), isize, start, end); + caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); } } #else @@ -144,7 +144,7 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits, if (!set && (state->state & bits) == 0) return; changeset->bytes_changed += state->end - state->start + 1; - ret = ulist_add(changeset->range_changed, state->start, state->end, + ret = ulist_add(&changeset->range_changed, state->start, state->end, GFP_ATOMIC); /* ENOMEM */ BUG_ON(ret < 0); @@ -226,6 +226,11 @@ static struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; + /* + * The given mask might be not appropriate for the slab allocator, + * drop the unsupported bits + */ + mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); state = kmem_cache_alloc(extent_state_cache, mask); if (!state) return state; @@ -1549,33 +1554,24 @@ out: return found; } +static int __process_pages_contig(struct address_space *mapping, + struct page *locked_page, + pgoff_t start_index, pgoff_t end_index, + unsigned long page_ops, pgoff_t *index_ret); + static noinline void __unlock_for_delalloc(struct inode *inode, struct page *locked_page, u64 start, u64 end) { - int ret; - struct page *pages[16]; unsigned long index = start >> PAGE_SHIFT; unsigned long end_index = end >> PAGE_SHIFT; - unsigned long nr_pages = end_index - index + 1; - int i; + ASSERT(locked_page); if (index == locked_page->index && end_index == index) return; - while (nr_pages > 0) { - ret = find_get_pages_contig(inode->i_mapping, index, - min_t(unsigned long, nr_pages, - ARRAY_SIZE(pages)), pages); - for (i = 0; i < ret; i++) { - if (pages[i] != locked_page) - unlock_page(pages[i]); - put_page(pages[i]); - } - nr_pages -= ret; - index += ret; - cond_resched(); - } + __process_pages_contig(inode->i_mapping, locked_page, index, end_index, + PAGE_UNLOCK, NULL); } static noinline int lock_delalloc_pages(struct inode *inode, @@ -1584,59 +1580,19 @@ static noinline int lock_delalloc_pages(struct inode *inode, u64 delalloc_end) { unsigned long index = delalloc_start >> PAGE_SHIFT; - unsigned long start_index = index; + unsigned long index_ret = index; unsigned long end_index = delalloc_end >> PAGE_SHIFT; - unsigned long pages_locked = 0; - struct page *pages[16]; - unsigned long nrpages; int ret; - int i; - /* the caller is responsible for locking the start index */ + ASSERT(locked_page); if (index == locked_page->index && index == end_index) return 0; - /* skip the page at the start index */ - nrpages = end_index - index + 1; - while (nrpages > 0) { - ret = find_get_pages_contig(inode->i_mapping, index, - min_t(unsigned long, - nrpages, ARRAY_SIZE(pages)), pages); - if (ret == 0) { - ret = -EAGAIN; - goto done; - } - /* now we have an array of pages, lock them all */ - for (i = 0; i < ret; i++) { - /* - * the caller is taking responsibility for - * locked_page - */ - if (pages[i] != locked_page) { - lock_page(pages[i]); - if (!PageDirty(pages[i]) || - pages[i]->mapping != inode->i_mapping) { - ret = -EAGAIN; - unlock_page(pages[i]); - put_page(pages[i]); - goto done; - } - } - put_page(pages[i]); - pages_locked++; - } - nrpages -= ret; - index += ret; - cond_resched(); - } - ret = 0; -done: - if (ret && pages_locked) { - __unlock_for_delalloc(inode, locked_page, - delalloc_start, - ((u64)(start_index + pages_locked - 1)) << - PAGE_SHIFT); - } + ret = __process_pages_contig(inode->i_mapping, locked_page, index, + end_index, PAGE_LOCK, &index_ret); + if (ret == -EAGAIN) + __unlock_for_delalloc(inode, locked_page, delalloc_start, + (u64)index_ret << PAGE_SHIFT); return ret; } @@ -1726,37 +1682,47 @@ out_failed: return found; } -void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, - u64 delalloc_end, struct page *locked_page, - unsigned clear_bits, - unsigned long page_ops) +static int __process_pages_contig(struct address_space *mapping, + struct page *locked_page, + pgoff_t start_index, pgoff_t end_index, + unsigned long page_ops, pgoff_t *index_ret) { - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - int ret; + unsigned long nr_pages = end_index - start_index + 1; + unsigned long pages_locked = 0; + pgoff_t index = start_index; struct page *pages[16]; - unsigned long index = start >> PAGE_SHIFT; - unsigned long end_index = end >> PAGE_SHIFT; - unsigned long nr_pages = end_index - index + 1; + unsigned ret; + int err = 0; int i; - clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (page_ops == 0) - return; + if (page_ops & PAGE_LOCK) { + ASSERT(page_ops == PAGE_LOCK); + ASSERT(index_ret && *index_ret == start_index); + } if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0) - mapping_set_error(inode->i_mapping, -EIO); + mapping_set_error(mapping, -EIO); while (nr_pages > 0) { - ret = find_get_pages_contig(inode->i_mapping, index, + ret = find_get_pages_contig(mapping, index, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); - for (i = 0; i < ret; i++) { + if (ret == 0) { + /* + * Only if we're going to lock these pages, + * can we find nothing at @index. + */ + ASSERT(page_ops & PAGE_LOCK); + return ret; + } + for (i = 0; i < ret; i++) { if (page_ops & PAGE_SET_PRIVATE2) SetPagePrivate2(pages[i]); if (pages[i] == locked_page) { put_page(pages[i]); + pages_locked++; continue; } if (page_ops & PAGE_CLEAR_DIRTY) @@ -1769,12 +1735,40 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, end_page_writeback(pages[i]); if (page_ops & PAGE_UNLOCK) unlock_page(pages[i]); + if (page_ops & PAGE_LOCK) { + lock_page(pages[i]); + if (!PageDirty(pages[i]) || + pages[i]->mapping != mapping) { + unlock_page(pages[i]); + put_page(pages[i]); + err = -EAGAIN; + goto out; + } + } put_page(pages[i]); + pages_locked++; } nr_pages -= ret; index += ret; cond_resched(); } +out: + if (err && index_ret) + *index_ret = start_index + pages_locked - 1; + return err; +} + +void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + u64 delalloc_end, struct page *locked_page, + unsigned clear_bits, + unsigned long page_ops) +{ + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0, + NULL, GFP_NOFS); + + __process_pages_contig(inode->i_mapping, locked_page, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + page_ops, NULL); } /* @@ -2060,7 +2054,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, btrfs_info_rl_in_rcu(fs_info, "read error corrected: ino %llu off %llu (dev %s sector %llu)", - btrfs_ino(inode), start, + btrfs_ino(BTRFS_I(inode)), start, rcu_str_deref(dev->name), sector); btrfs_bio_counter_dec(fs_info); bio_put(bio); @@ -2765,7 +2759,6 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, size_t size, unsigned long offset, struct block_device *bdev, struct bio **bio_ret, - unsigned long max_pages, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, @@ -2931,7 +2924,6 @@ static int __do_readpage(struct extent_io_tree *tree, } } while (cur <= end) { - unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1; bool force_bio_submit = false; if (cur >= last_byte) { @@ -3066,10 +3058,9 @@ static int __do_readpage(struct extent_io_tree *tree, continue; } - pnr -= page->index; ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL, page, sector, disk_io_size, pg_offset, - bdev, bio, pnr, + bdev, bio, end_bio_extent_readpage, mirror_num, *bio_flags, this_bio_flag, @@ -3210,7 +3201,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, return ret; } -static void update_nr_written(struct page *page, struct writeback_control *wbc, +static void update_nr_written(struct writeback_control *wbc, unsigned long nr_written) { wbc->nr_to_write -= nr_written; @@ -3330,7 +3321,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, u64 block_start; u64 iosize; sector_t sector; - struct extent_state *cached_state = NULL; struct extent_map *em; struct block_device *bdev; size_t pg_offset = 0; @@ -3349,10 +3339,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, else redirty_page_for_writepage(wbc, page); - update_nr_written(page, wbc, nr_written); + update_nr_written(wbc, nr_written); unlock_page(page); - ret = 1; - goto done_unlocked; + return 1; } } @@ -3360,7 +3349,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, * we don't want to touch the inode after unlocking the page, * so we update the mapping writeback index now */ - update_nr_written(page, wbc, nr_written + 1); + update_nr_written(wbc, nr_written + 1); end = page_end; if (i_size <= start) { @@ -3374,7 +3363,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, while (cur <= end) { u64 em_end; - unsigned long max_nr; if (cur >= i_size) { if (tree->ops && tree->ops->writepage_end_io_hook) @@ -3431,8 +3419,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, continue; } - max_nr = (i_size >> PAGE_SHIFT) + 1; - set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(BTRFS_I(inode)->root->fs_info, @@ -3442,11 +3428,14 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, page, sector, iosize, pg_offset, - bdev, &epd->bio, max_nr, + bdev, &epd->bio, end_bio_extent_writepage, 0, 0, 0, false); - if (ret) + if (ret) { SetPageError(page); + if (PageWriteback(page)) + end_page_writeback(page); + } cur = cur + iosize; pg_offset += iosize; @@ -3454,11 +3443,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, } done: *nr_ret = nr; - -done_unlocked: - - /* drop our reference on any cached states */ - free_extent_state(cached_state); return ret; } @@ -3761,20 +3745,21 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, set_page_writeback(p); ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, p, offset >> 9, PAGE_SIZE, 0, bdev, - &epd->bio, -1, + &epd->bio, end_bio_extent_buffer_writepage, 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { set_btree_ioerr(p); - end_page_writeback(p); + if (PageWriteback(p)) + end_page_writeback(p); if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) end_extent_buffer_writeback(eb); ret = -EIO; break; } offset += PAGE_SIZE; - update_nr_written(p, wbc, 1); + update_nr_written(wbc, 1); unlock_page(p); } @@ -3926,8 +3911,7 @@ retry: * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ -static int extent_write_cache_pages(struct extent_io_tree *tree, - struct address_space *mapping, +static int extent_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data, void (*flush_fn)(void *)) @@ -4168,8 +4152,7 @@ int extent_writepages(struct extent_io_tree *tree, .bio_flags = 0, }; - ret = extent_write_cache_pages(tree, mapping, wbc, - __extent_writepage, &epd, + ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, flush_write_bio); flush_epd_write_bio(&epd); return ret; @@ -4264,8 +4247,6 @@ static int try_release_extent_state(struct extent_map_tree *map, EXTENT_IOBITS, 0, NULL)) ret = 0; else { - if ((mask & GFP_NOFS) == GFP_NOFS) - mask = GFP_NOFS; /* * at this point we can safely clear everything except the * locked bit and the nodatasum bit @@ -4410,8 +4391,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * lookup the last file extent. We're not using i_size here * because there might be preallocation past i_size */ - ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1, - 0); + ret = btrfs_lookup_file_extent(NULL, root, path, + btrfs_ino(BTRFS_I(inode)), -1, 0); if (ret < 0) { btrfs_free_path(path); return ret; @@ -4426,7 +4407,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, found_type = found_key.type; /* No extents, but there might be delalloc bits */ - if (found_key.objectid != btrfs_ino(inode) || + if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) || found_type != BTRFS_EXTENT_DATA_KEY) { /* have to trust i_size as the end */ last = (u64)-1; @@ -4535,8 +4516,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * lookup stuff. */ ret = btrfs_check_shared(trans, root->fs_info, - root->objectid, - btrfs_ino(inode), bytenr); + root->objectid, + btrfs_ino(BTRFS_I(inode)), bytenr); if (trans) btrfs_end_transaction(trans); if (ret < 0) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 17f9ce479ed7..270d03be290e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -45,13 +45,14 @@ #define EXTENT_BUFFER_IN_TREE 10 #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ -/* these are flags for extent_clear_unlock_delalloc */ +/* these are flags for __process_pages_contig */ #define PAGE_UNLOCK (1 << 0) #define PAGE_CLEAR_DIRTY (1 << 1) #define PAGE_SET_WRITEBACK (1 << 2) #define PAGE_END_WRITEBACK (1 << 3) #define PAGE_SET_PRIVATE2 (1 << 4) #define PAGE_SET_ERROR (1 << 5) +#define PAGE_LOCK (1 << 6) /* * page->private values. Every page that is controlled by the extent @@ -192,7 +193,7 @@ struct extent_changeset { u64 bytes_changed; /* Changed ranges */ - struct ulist *range_changed; + struct ulist range_changed; }; static inline void extent_set_compress_type(unsigned long *bio_flags, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e97e322c28f0..f7b9a92ad56d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -255,7 +255,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, } else { btrfs_info_rl(fs_info, "no csum found for inode %llu start %llu", - btrfs_ino(inode), offset); + btrfs_ino(BTRFS_I(inode)), offset); } item = NULL; btrfs_release_path(path); @@ -856,8 +856,8 @@ insert: tmp = min(tmp, (next_offset - file_key.offset) >> fs_info->sb->s_blocksize_bits); - tmp = max((u64)1, tmp); - tmp = min(tmp, (u64)MAX_CSUM_ITEMS(fs_info, csum_size)); + tmp = max_t(u64, 1, tmp); + tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); ins_size = csum_size * tmp; } else { ins_size = csum_size; @@ -977,7 +977,7 @@ void btrfs_extent_item_to_extent_map(struct inode *inode, } else { btrfs_err(fs_info, "unknown file extent item type %d, inode %llu, offset %llu, root %llu", - type, btrfs_ino(inode), extent_start, + type, btrfs_ino(BTRFS_I(inode)), extent_start, root->root_key.objectid); } } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b5c5da215d05..c1d2a07205da 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -168,7 +168,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!defrag) return -ENOMEM; - defrag->ino = btrfs_ino(inode); + defrag->ino = btrfs_ino(BTRFS_I(inode)); defrag->transid = transid; defrag->root = root->root_key.objectid; @@ -702,7 +702,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *fi; struct btrfs_key key; struct btrfs_key new_key; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); u64 search_start = start; u64 disk_bytenr = 0; u64 num_bytes = 0; @@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, int del_slot = 0; int recow; int ret; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); path = btrfs_alloc_path(); if (!path) @@ -2062,7 +2062,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * commit does not start nor waits for ordered extents to complete. */ smp_mb(); - if (btrfs_inode_in_log(inode, fs_info->generation) || + if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || (full_sync && BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) || (!btrfs_have_ordered_extents_in_range(inode, start, len) && @@ -2203,7 +2203,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, return 0; btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != btrfs_ino(inode) || + if (key.objectid != btrfs_ino(BTRFS_I(inode)) || key.type != BTRFS_EXTENT_DATA_KEY) return 0; @@ -2237,7 +2237,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, if (btrfs_fs_incompat(fs_info, NO_HOLES)) goto out; - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = offset; @@ -2285,9 +2285,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, } btrfs_release_path(path); - ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, - 0, 0, end - offset, 0, end - offset, - 0, 0, 0); + ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)), + offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0); if (ret) return ret; @@ -2876,7 +2875,7 @@ static long btrfs_fallocate(struct file *file, int mode, if (!ret) ret = btrfs_prealloc_file_range(inode, mode, range->start, - range->len, 1 << inode->i_blkbits, + range->len, i_blocksize(inode), offset + len, &alloc_hint); else btrfs_free_reserved_data_space(inode, range->start, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 7015892c9ee8..1a131f7d6c1b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -94,12 +94,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, return inode; } -struct inode *lookup_free_space_inode(struct btrfs_root *root, +struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) { struct inode *inode = NULL; - struct btrfs_fs_info *fs_info = root->fs_info; u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; spin_lock(&block_group->lock); @@ -109,7 +108,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, if (inode) return inode; - inode = __lookup_free_space_inode(root, path, + inode = __lookup_free_space_inode(fs_info->tree_root, path, block_group->key.objectid); if (IS_ERR(inode)) return inode; @@ -192,7 +191,7 @@ static int __create_free_space_inode(struct btrfs_root *root, return 0; } -int create_free_space_inode(struct btrfs_root *root, +int create_free_space_inode(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) @@ -200,11 +199,11 @@ int create_free_space_inode(struct btrfs_root *root, int ret; u64 ino; - ret = btrfs_find_free_objectid(root, &ino); + ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); if (ret < 0) return ret; - return __create_free_space_inode(root, trans, path, ino, + return __create_free_space_inode(fs_info->tree_root, trans, path, ino, block_group->key.objectid); } @@ -227,21 +226,21 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, return ret; } -int btrfs_truncate_free_space_cache(struct btrfs_root *root, - struct btrfs_trans_handle *trans, +int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct inode *inode) { + struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - struct btrfs_path *path = btrfs_alloc_path(); bool locked = false; - if (!path) { - ret = -ENOMEM; - goto fail; - } - if (block_group) { + struct btrfs_path *path = btrfs_alloc_path(); + + if (!path) { + ret = -ENOMEM; + goto fail; + } locked = true; mutex_lock(&trans->transaction->cache_write_mutex); if (!list_empty(&block_group->io_list)) { @@ -258,8 +257,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_CLEAR; spin_unlock(&block_group->lock); + btrfs_free_path(path); } - btrfs_free_path(path); btrfs_i_size_write(inode, 0); truncate_pagecache(inode, 0); @@ -286,14 +285,14 @@ fail: return ret; } -static int readahead_cache(struct inode *inode) +static void readahead_cache(struct inode *inode) { struct file_ra_state *ra; unsigned long last_index; ra = kzalloc(sizeof(*ra), GFP_NOFS); if (!ra) - return -ENOMEM; + return; file_ra_state_init(ra, inode->i_mapping); last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; @@ -301,8 +300,6 @@ static int readahead_cache(struct inode *inode) page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); kfree(ra); - - return 0; } static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, @@ -313,7 +310,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); - if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) + if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) check_crcs = 1; /* Make sure we can fit our crcs into the first page */ @@ -730,9 +727,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, if (ret) return ret; - ret = readahead_cache(inode); - if (ret) - goto out; + readahead_cache(inode); ret = io_ctl_prepare_pages(&io_ctl, inode, 1); if (ret) @@ -828,7 +823,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; - struct btrfs_root *root = fs_info->tree_root; struct inode *inode; struct btrfs_path *path; int ret = 0; @@ -852,7 +846,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, path->search_commit_root = 1; path->skip_locking = 1; - inode = lookup_free_space_inode(root, block_group, path); + inode = lookup_free_space_inode(fs_info, block_group, path); if (IS_ERR(inode)) { btrfs_free_path(path); return 0; @@ -1128,8 +1122,7 @@ cleanup_bitmap_list(struct list_head *bitmap_list) static void noinline_for_stack cleanup_write_cache_enospc(struct inode *inode, struct btrfs_io_ctl *io_ctl, - struct extent_state **cached_state, - struct list_head *bitmap_list) + struct extent_state **cached_state) { io_ctl_drop_pages(io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, @@ -1225,8 +1218,6 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, * @ctl - the free space cache we are going to write out * @block_group - the block_group for this cache if it belongs to a block_group * @trans - the trans handle - * @path - the path to use - * @offset - the offset for the key we'll insert * * This function writes out a free space cache struct to disk for quick recovery * on mount. This will return 0 if it was successful in writing the cache out, @@ -1236,8 +1227,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_block_group_cache *block_group, struct btrfs_io_ctl *io_ctl, - struct btrfs_trans_handle *trans, - struct btrfs_path *path, u64 offset) + struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_state *cached_state = NULL; @@ -1365,7 +1355,7 @@ out_nospc_locked: mutex_unlock(&ctl->cache_writeout_mutex); out_nospc: - cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); + cleanup_write_cache_enospc(inode, io_ctl, &cached_state); if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) up_write(&block_group->data_rwsem); @@ -1378,7 +1368,6 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) { - struct btrfs_root *root = fs_info->tree_root; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct inode *inode; int ret = 0; @@ -1390,13 +1379,12 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, } spin_unlock(&block_group->lock); - inode = lookup_free_space_inode(root, block_group, path); + inode = lookup_free_space_inode(fs_info, block_group, path); if (IS_ERR(inode)) return 0; - ret = __btrfs_write_out_cache(root, inode, ctl, block_group, - &block_group->io_ctl, trans, - path, block_group->key.objectid); + ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, + block_group, &block_group->io_ctl, trans); if (ret) { #ifdef DEBUG btrfs_err(fs_info, @@ -3543,8 +3531,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, return 0; memset(&io_ctl, 0, sizeof(io_ctl)); - ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, - trans, path, 0); + ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); if (!ret) { /* * At this point writepages() didn't error out, so our metadata diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 6f3c025a2c6c..79eca4cabb1c 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -51,18 +51,17 @@ struct btrfs_free_space_op { struct btrfs_io_ctl; -struct inode *lookup_free_space_inode(struct btrfs_root *root, +struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); -int create_free_space_inode(struct btrfs_root *root, +int create_free_space_inode(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct btrfs_path *path); int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv); -int btrfs_truncate_free_space_cache(struct btrfs_root *root, - struct btrfs_trans_handle *trans, +int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group, struct inode *inode); int load_free_space_cache(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index ff0c55337c2e..dd7fb22a955a 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1269,7 +1269,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) list_del(&free_space_root->dirty_list); btrfs_tree_lock(free_space_root->node); - clean_tree_block(trans, fs_info, free_space_root->node); + clean_tree_block(fs_info, free_space_root->node); btrfs_tree_unlock(free_space_root->node); btrfs_free_tree_block(trans, free_space_root, free_space_root->node, 0, 1); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 144b119ff43f..3bbb8f095953 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -467,7 +467,7 @@ again: } if (i_size_read(inode) > 0) { - ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); + ret = btrfs_truncate_free_space_cache(trans, NULL, inode); if (ret) { if (ret != -ENOSPC) btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1e861a063721..f02823f088c2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -71,6 +71,7 @@ struct btrfs_dio_data { u64 reserve; u64 unsubmitted_oe_range_start; u64 unsubmitted_oe_range_end; + int overwrite; }; static const struct inode_operations btrfs_dir_inode_operations; @@ -108,11 +109,11 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, u64 delalloc_end, int *page_started, unsigned long *nr_written, int unlock, struct btrfs_dedupe_hash *hash); -static struct extent_map *create_pinned_em(struct inode *inode, u64 start, - u64 len, u64 orig_start, - u64 block_start, u64 block_len, - u64 orig_block_len, u64 ram_bytes, - int type); +static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, + u64 orig_start, u64 block_start, + u64 block_len, u64 orig_block_len, + u64 ram_bytes, int compress_type, + int type); static int btrfs_dirty_inode(struct inode *inode); @@ -166,7 +167,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_key key; size_t datasize; - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.offset = start; key.type = BTRFS_EXTENT_DATA_KEY; @@ -388,6 +389,15 @@ static inline int inode_need_compress(struct inode *inode) return 0; } +static inline void inode_should_defrag(struct inode *inode, + u64 start, u64 end, u64 num_bytes, u64 small_write) +{ + /* If this is a small write inside eof, kick off a defrag */ + if (num_bytes < small_write && + (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) + btrfs_add_inode_defrag(NULL, inode); +} + /* * we create compressed extents in two phases. The first * phase compresses a range of pages that have already been @@ -430,10 +440,7 @@ static noinline void compress_file_range(struct inode *inode, int compress_type = fs_info->compress_type; int redirty = 0; - /* if this is a small write inside eof, kick off a defrag */ - if ((end - start + 1) < SZ_16K && - (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) - btrfs_add_inode_defrag(NULL, inode); + inode_should_defrag(inode, start, end, end - start + 1, SZ_16K); actual_end = min_t(u64, isize, end + 1); again: @@ -541,7 +548,7 @@ cont: * to make an uncompressed inline extent. */ ret = cow_file_range_inline(root, inode, start, end, - 0, 0, NULL); + 0, BTRFS_COMPRESS_NONE, NULL); } else { /* try making a compressed inline extent */ ret = cow_file_range_inline(root, inode, start, end, @@ -690,7 +697,6 @@ static noinline void submit_compressed_extents(struct inode *inode, struct btrfs_key ins; struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree; int ret = 0; @@ -778,46 +784,19 @@ retry: * here we're doing allocation and writeback of the * compressed pages */ - btrfs_drop_extent_cache(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, 0); - - em = alloc_extent_map(); - if (!em) { - ret = -ENOMEM; - goto out_free_reserve; - } - em->start = async_extent->start; - em->len = async_extent->ram_size; - em->orig_start = em->start; - em->mod_start = em->start; - em->mod_len = em->len; - - em->block_start = ins.objectid; - em->block_len = ins.offset; - em->orig_block_len = ins.offset; - em->ram_bytes = async_extent->ram_size; - em->bdev = fs_info->fs_devices->latest_bdev; - em->compress_type = async_extent->compress_type; - set_bit(EXTENT_FLAG_PINNED, &em->flags); - set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); - em->generation = -1; - - while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 1); - write_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, 0); - } - - if (ret) + em = create_io_em(inode, async_extent->start, + async_extent->ram_size, /* len */ + async_extent->start, /* orig_start */ + ins.objectid, /* block_start */ + ins.offset, /* block_len */ + ins.offset, /* orig_block_len */ + async_extent->ram_size, /* ram_bytes */ + async_extent->compress_type, + BTRFS_ORDERED_COMPRESSED); + if (IS_ERR(em)) + /* ret value is not necessary due to void function */ goto out_free_reserve; + free_extent_map(em); ret = btrfs_add_ordered_extent_compress(inode, async_extent->start, @@ -952,7 +931,6 @@ static noinline int cow_file_range(struct inode *inode, u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; if (btrfs_is_free_space_inode(inode)) { @@ -965,15 +943,12 @@ static noinline int cow_file_range(struct inode *inode, num_bytes = max(blocksize, num_bytes); disk_num_bytes = num_bytes; - /* if this is a small write inside eof, kick off defrag */ - if (num_bytes < SZ_64K && - (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) - btrfs_add_inode_defrag(NULL, inode); + inode_should_defrag(inode, start, end, num_bytes, SZ_64K); if (start == 0) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(root, inode, start, end, 0, 0, - NULL); + ret = cow_file_range_inline(root, inode, start, end, 0, + BTRFS_COMPRESS_NONE, NULL); if (ret == 0) { extent_clear_unlock_delalloc(inode, start, end, delalloc_end, NULL, @@ -1008,39 +983,18 @@ static noinline int cow_file_range(struct inode *inode, if (ret < 0) goto out_unlock; - em = alloc_extent_map(); - if (!em) { - ret = -ENOMEM; - goto out_reserve; - } - em->start = start; - em->orig_start = em->start; ram_size = ins.offset; - em->len = ins.offset; - em->mod_start = em->start; - em->mod_len = em->len; - - em->block_start = ins.objectid; - em->block_len = ins.offset; - em->orig_block_len = ins.offset; - em->ram_bytes = ram_size; - em->bdev = fs_info->fs_devices->latest_bdev; - set_bit(EXTENT_FLAG_PINNED, &em->flags); - em->generation = -1; - - while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 1); - write_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, start, - start + ram_size - 1, 0); - } - if (ret) + em = create_io_em(inode, start, ins.offset, /* len */ + start, /* orig_start */ + ins.objectid, /* block_start */ + ins.offset, /* block_len */ + ins.offset, /* orig_block_len */ + ram_size, /* ram_bytes */ + BTRFS_COMPRESS_NONE, /* compress_type */ + BTRFS_ORDERED_REGULAR /* type */); + if (IS_ERR(em)) goto out_reserve; + free_extent_map(em); cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, @@ -1164,7 +1118,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long nr_pages; u64 cur_end; - int limit = 10 * SZ_1M; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); @@ -1196,12 +1149,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); - if (atomic_read(&fs_info->async_delalloc_pages) > limit) { - wait_event(fs_info->async_submit_wait, - (atomic_read(&fs_info->async_delalloc_pages) < - limit)); - } - while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->async_delalloc_pages)) { wait_event(fs_info->async_submit_wait, @@ -1250,11 +1197,11 @@ static noinline int run_delalloc_nocow(struct inode *inode, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct extent_buffer *leaf; struct btrfs_path *path; struct btrfs_file_extent_item *fi; struct btrfs_key found_key; + struct extent_map *em; u64 cow_start; u64 cur_offset; u64 extent_end; @@ -1269,7 +1216,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, int nocow; int check_prev = 1; bool nolock; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); path = btrfs_alloc_path(); if (!path) { @@ -1286,30 +1233,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, nolock = btrfs_is_free_space_inode(inode); - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - - if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, start, end, end, - locked_page, - EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, PAGE_UNLOCK | - PAGE_CLEAR_DIRTY | - PAGE_SET_WRITEBACK | - PAGE_END_WRITEBACK); - btrfs_free_path(path); - return PTR_ERR(trans); - } - - trans->block_rsv = &fs_info->delalloc_block_rsv; - cow_start = (u64)-1; cur_offset = start; while (1) { - ret = btrfs_lookup_file_extent(trans, root, path, ino, + ret = btrfs_lookup_file_extent(NULL, root, path, ino, cur_offset, 0); if (ret < 0) goto error; @@ -1382,7 +1309,7 @@ next_slot: goto out_check; if (btrfs_extent_readonly(fs_info, disk_bytenr)) goto out_check; - if (btrfs_cross_ref_exist(trans, root, ino, + if (btrfs_cross_ref_exist(root, ino, found_key.offset - extent_offset, disk_bytenr)) goto out_check; @@ -1455,35 +1382,28 @@ out_check: } if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { - struct extent_map *em; - struct extent_map_tree *em_tree; - em_tree = &BTRFS_I(inode)->extent_tree; - em = alloc_extent_map(); - BUG_ON(!em); /* -ENOMEM */ - em->start = cur_offset; - em->orig_start = found_key.offset - extent_offset; - em->len = num_bytes; - em->block_len = num_bytes; - em->block_start = disk_bytenr; - em->orig_block_len = disk_num_bytes; - em->ram_bytes = ram_bytes; - em->bdev = fs_info->fs_devices->latest_bdev; - em->mod_start = em->start; - em->mod_len = em->len; - set_bit(EXTENT_FLAG_PINNED, &em->flags); - set_bit(EXTENT_FLAG_FILLING, &em->flags); - em->generation = -1; - while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 1); - write_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, em->start, - em->start + em->len - 1, 0); + u64 orig_start = found_key.offset - extent_offset; + + em = create_io_em(inode, cur_offset, num_bytes, + orig_start, + disk_bytenr, /* block_start */ + num_bytes, /* block_len */ + disk_num_bytes, /* orig_block_len */ + ram_bytes, BTRFS_COMPRESS_NONE, + BTRFS_ORDERED_PREALLOC); + if (IS_ERR(em)) { + if (!nolock && nocow) + btrfs_end_write_no_snapshoting(root); + if (nocow) + btrfs_dec_nocow_writers(fs_info, + disk_bytenr); + ret = PTR_ERR(em); + goto error; } + free_extent_map(em); + } + + if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { type = BTRFS_ORDERED_PREALLOC; } else { type = BTRFS_ORDERED_NOCOW; @@ -1534,10 +1454,6 @@ out_check: } error: - err = btrfs_end_transaction(trans); - if (!ret) - ret = err; - if (ret && cur_offset < end) extent_clear_unlock_delalloc(inode, cur_offset, end, end, locked_page, EXTENT_LOCKED | @@ -1609,7 +1525,7 @@ static void btrfs_split_extent_hook(struct inode *inode, size = orig->end - orig->start + 1; if (size > BTRFS_MAX_EXTENT_SIZE) { - u64 num_extents; + u32 num_extents; u64 new_size; /* @@ -1617,13 +1533,10 @@ static void btrfs_split_extent_hook(struct inode *inode, * applies here, just in reverse. */ new_size = orig->end - split + 1; - num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); + num_extents = count_max_extents(new_size); new_size = split - orig->start; - num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); - if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) >= num_extents) + num_extents += count_max_extents(new_size); + if (count_max_extents(size) >= num_extents) return; } @@ -1643,7 +1556,7 @@ static void btrfs_merge_extent_hook(struct inode *inode, struct extent_state *other) { u64 new_size, old_size; - u64 num_extents; + u32 num_extents; /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) @@ -1681,14 +1594,10 @@ static void btrfs_merge_extent_hook(struct inode *inode, * this case. */ old_size = other->end - other->start + 1; - num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); + num_extents = count_max_extents(old_size); old_size = new->end - new->start + 1; - num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); - - if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) >= num_extents) + num_extents += count_max_extents(old_size); + if (count_max_extents(new_size) >= num_extents) return; spin_lock(&BTRFS_I(inode)->lock); @@ -1797,8 +1706,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 len = state->end + 1 - state->start; - u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1, - BTRFS_MAX_EXTENT_SIZE); + u32 num_extents = count_max_extents(len); spin_lock(&BTRFS_I(inode)->lock); if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) @@ -1997,8 +1905,7 @@ out: * at IO completion time based on sums calculated at bio submission time. */ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, - struct inode *inode, u64 file_offset, - struct list_head *list) + struct inode *inode, struct list_head *list) { struct btrfs_ordered_sum *sum; @@ -2161,7 +2068,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, goto out; if (!extent_inserted) { - ins.objectid = btrfs_ino(inode); + ins.objectid = btrfs_ino(BTRFS_I(inode)); ins.offset = file_pos; ins.type = BTRFS_EXTENT_DATA_KEY; @@ -2194,8 +2101,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, ins.offset = disk_num_bytes; ins.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid, - btrfs_ino(inode), file_pos, - ram_bytes, &ins); + btrfs_ino(BTRFS_I(inode)), file_pos, ram_bytes, &ins); /* * Release the reserved range from inode dirty range map, as it is * already moved into delayed_ref_head @@ -2320,7 +2226,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, u64 num_bytes; if (BTRFS_I(inode)->root->root_key.objectid == root_id && - inum == btrfs_ino(inode)) + inum == btrfs_ino(BTRFS_I(inode))) return 0; key.objectid = root_id; @@ -2589,7 +2495,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path, if (ret) goto out_free_path; again: - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = start; @@ -2768,7 +2674,7 @@ record_old_file_extents(struct inode *inode, if (!path) goto out_kfree; - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = new->file_pos; @@ -2803,7 +2709,7 @@ record_old_file_extents(struct inode *inode, btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid != btrfs_ino(inode)) + if (key.objectid != btrfs_ino(BTRFS_I(inode))) break; if (key.type != BTRFS_EXTENT_DATA_KEY) break; @@ -2993,8 +2899,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out_unlock; } - add_pending_csums(trans, inode, ordered_extent->file_offset, - &ordered_extent->list); + add_pending_csums(trans, inode, &ordered_extent->list); btrfs_ordered_update_i_size(inode, 0, ordered_extent); ret = btrfs_update_inode_fallback(trans, root, inode); @@ -3123,9 +3028,8 @@ static int __readpage_endio_check(struct inode *inode, kunmap_atomic(kaddr); return 0; zeroit: - btrfs_warn_rl(BTRFS_I(inode)->root->fs_info, - "csum failed ino %llu off %llu csum %u expected csum %u", - btrfs_ino(inode), start, csum, csum_expected); + btrfs_print_data_csum_error(inode, start, csum, csum_expected, + io_bio->mirror_num); memset(kaddr + pgoff, 1, len); flush_dcache_page(page); kunmap_atomic(kaddr); @@ -3326,7 +3230,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { - ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); + ret = btrfs_insert_orphan_item(trans, root, + btrfs_ino(BTRFS_I(inode))); if (ret) { atomic_dec(&root->orphan_inodes); if (reserve) { @@ -3382,7 +3287,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, atomic_dec(&root->orphan_inodes); if (trans) ret = btrfs_del_orphan_item(trans, root, - btrfs_ino(inode)); + btrfs_ino(BTRFS_I(inode))); } if (release_rsv) @@ -3789,7 +3694,7 @@ cache_index: goto cache_acl; btrfs_item_key_to_cpu(leaf, &location, path->slots[0]); - if (location.objectid != btrfs_ino(inode)) + if (location.objectid != btrfs_ino(BTRFS_I(inode))) goto cache_acl; ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); @@ -3811,14 +3716,14 @@ cache_acl: * any xattrs or acls */ maybe_acls = acls_after_inode_item(leaf, path->slots[0], - btrfs_ino(inode), &first_xattr_slot); + btrfs_ino(BTRFS_I(inode)), &first_xattr_slot); if (first_xattr_slot != -1) { path->slots[0] = first_xattr_slot; ret = btrfs_load_inode_props(inode, path); if (ret) btrfs_err(fs_info, "error loading props for ino %llu (root %llu): %d", - btrfs_ino(inode), + btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret); } btrfs_free_path(path); @@ -3993,7 +3898,8 @@ noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, */ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, + struct btrfs_inode *inode, const char *name, int name_len) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4040,10 +3946,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, * that we delay to delete it, and just do this deletion when * we update the inode item. */ - if (BTRFS_I(inode)->dir_index) { + if (inode->dir_index) { ret = btrfs_delayed_delete_inode_ref(inode); if (!ret) { - index = BTRFS_I(inode)->dir_index; + index = inode->dir_index; goto skip_backref; } } @@ -4064,15 +3970,15 @@ skip_backref: goto err; } - ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, - inode, dir_ino); + ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, + dir_ino); if (ret != 0 && ret != -ENOENT) { btrfs_abort_transaction(trans, ret); goto err; } - ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, - dir, index); + ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, + index); if (ret == -ENOENT) ret = 0; else if (ret) @@ -4082,26 +3988,27 @@ err: if (ret) goto out; - btrfs_i_size_write(dir, dir->i_size - name_len * 2); - inode_inc_iversion(inode); - inode_inc_iversion(dir); - inode->i_ctime = dir->i_mtime = - dir->i_ctime = current_time(inode); - ret = btrfs_update_inode(trans, root, dir); + btrfs_i_size_write(&dir->vfs_inode, + dir->vfs_inode.i_size - name_len * 2); + inode_inc_iversion(&inode->vfs_inode); + inode_inc_iversion(&dir->vfs_inode); + inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime = + dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode); + ret = btrfs_update_inode(trans, root, &dir->vfs_inode); out: return ret; } int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, const char *name, int name_len) { int ret; ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) { - drop_nlink(inode); - ret = btrfs_update_inode(trans, root, inode); + drop_nlink(&inode->vfs_inode); + ret = btrfs_update_inode(trans, root, &inode->vfs_inode); } return ret; } @@ -4139,10 +4046,12 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0); + btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + 0); - ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), - dentry->d_name.name, dentry->d_name.len); + ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), + BTRFS_I(d_inode(dentry)), dentry->d_name.name, + dentry->d_name.len); if (ret) goto out; @@ -4170,7 +4079,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, struct btrfs_key key; u64 index; int ret; - u64 dir_ino = btrfs_ino(dir); + u64 dir_ino = btrfs_ino(BTRFS_I(dir)); path = btrfs_alloc_path(); if (!path) @@ -4222,7 +4131,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index); + ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -4249,14 +4158,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) + if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) return -EPERM; trans = __unlink_start_trans(dir); if (IS_ERR(trans)) return PTR_ERR(trans); - if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, root, dir, BTRFS_I(inode)->location.objectid, dentry->d_name.name, @@ -4271,8 +4180,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; /* now the directory is empty */ - err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), - dentry->d_name.name, dentry->d_name.len); + err = btrfs_unlink_inode(trans, root, BTRFS_I(dir), + BTRFS_I(d_inode(dentry)), dentry->d_name.name, + dentry->d_name.len); if (!err) { btrfs_i_size_write(inode, 0); /* @@ -4398,7 +4308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int extent_type = -1; int ret; int err = 0; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); u64 bytes_deleted = 0; bool be_nice = 0; bool should_throttle = 0; @@ -4437,7 +4347,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * items. */ if (min_type == 0 && root == BTRFS_I(inode)->root) - btrfs_kill_delayed_inode_items(inode); + btrfs_kill_delayed_inode_items(BTRFS_I(inode)); key.objectid = ino; key.offset = (u64)-1; @@ -4702,6 +4612,13 @@ error: btrfs_free_path(path); + if (err == 0) { + /* only inline file may have last_size != new_size */ + if (new_size >= fs_info->sectorsize || + new_size > fs_info->max_inline) + ASSERT(last_size == new_size); + } + if (be_nice && bytes_deleted > SZ_32M) { unsigned long updates = trans->delayed_ref_updates; if (updates) { @@ -4870,8 +4787,8 @@ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode, return ret; } - ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, - 0, 0, len, 0, len, 0, 0, 0); + ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)), + offset, 0, 0, len, 0, len, 0, 0, 0); if (ret) btrfs_abort_transaction(trans, ret); else @@ -5087,6 +5004,13 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) if (ret && inode->i_nlink) { int err; + /* To get a stable disk_i_size */ + err = btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (err) { + btrfs_orphan_del(NULL, inode); + return err; + } + /* * failed to truncate, disk_i_size is only adjusted down * as we remove extents, so it should represent the true @@ -5282,7 +5206,7 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } - ret = btrfs_commit_inode_delayed_inode(inode); + ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode)); if (ret) { btrfs_orphan_del(NULL, inode); goto no_delete; @@ -5402,12 +5326,12 @@ void btrfs_evict_inode(struct inode *inode) trans->block_rsv = &fs_info->trans_block_rsv; if (!(root == fs_info->tree_root || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) - btrfs_return_ino(root, btrfs_ino(inode)); + btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode))); btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); no_delete: - btrfs_remove_delayed_node(inode); + btrfs_remove_delayed_node(BTRFS_I(inode)); clear_inode(inode); } @@ -5429,8 +5353,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, if (!path) return -ENOMEM; - di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, - namelen, 0); + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), + name, namelen, 0); if (IS_ERR(di)) ret = PTR_ERR(di); @@ -5485,7 +5409,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); - if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) || + if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) || btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) goto out; @@ -5520,7 +5444,7 @@ static void inode_tree_add(struct inode *inode) struct rb_node **p; struct rb_node *parent; struct rb_node *new = &BTRFS_I(inode)->rb_node; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); if (inode_unhashed(inode)) return; @@ -5531,9 +5455,9 @@ static void inode_tree_add(struct inode *inode) parent = *p; entry = rb_entry(parent, struct btrfs_inode, rb_node); - if (ino < btrfs_ino(&entry->vfs_inode)) + if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode))) p = &parent->rb_left; - else if (ino > btrfs_ino(&entry->vfs_inode)) + else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode))) p = &parent->rb_right; else { WARN_ON(!(entry->vfs_inode.i_state & @@ -5593,9 +5517,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < btrfs_ino(&entry->vfs_inode)) + if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode))) node = node->rb_left; - else if (objectid > btrfs_ino(&entry->vfs_inode)) + else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode))) node = node->rb_right; else break; @@ -5603,7 +5527,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= btrfs_ino(&entry->vfs_inode)) { + if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) { node = prev; break; } @@ -5612,7 +5536,7 @@ again: } while (node) { entry = rb_entry(node, struct btrfs_inode, rb_node); - objectid = btrfs_ino(&entry->vfs_inode) + 1; + objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1; inode = igrab(&entry->vfs_inode); if (inode) { spin_unlock(&root->inode_lock); @@ -5796,7 +5720,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry) if (btrfs_root_refs(&root->root_item) == 0) return 1; - if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return 1; } return 0; @@ -5865,7 +5789,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) key.type = BTRFS_DIR_INDEX_KEY; key.offset = ctx->pos; - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -6062,7 +5986,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) struct extent_buffer *leaf; int ret; - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.type = BTRFS_DIR_INDEX_KEY; key.offset = (u64)-1; @@ -6094,7 +6018,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != btrfs_ino(inode) || + if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) || found_key.type != BTRFS_DIR_INDEX_KEY) { BTRFS_I(inode)->index_cnt = 2; goto out; @@ -6115,7 +6039,7 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) int ret = 0; if (BTRFS_I(dir)->index_cnt == (u64)-1) { - ret = btrfs_inode_delayed_dir_index_count(dir); + ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir)); if (ret) { ret = btrfs_set_inode_index_count(dir); if (ret) @@ -6294,7 +6218,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (ret) btrfs_err(fs_info, "error inheriting props for ino %llu (root %llu): %d", - btrfs_ino(inode), root->root_key.objectid, ret); + btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret); return inode; @@ -6327,8 +6251,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(parent_inode)->root; - u64 ino = btrfs_ino(inode); - u64 parent_ino = btrfs_ino(parent_inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); + u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode)); if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); @@ -6427,8 +6351,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, btrfs_ino(dir), objectid, - mode, &index); + dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid, + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -6499,8 +6423,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, btrfs_ino(dir), objectid, - mode, &index); + dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid, + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -6609,7 +6533,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; } d_instantiate(dentry, inode); - btrfs_log_new_name(trans, inode, NULL, parent); + btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); } btrfs_balance_delayed_items(fs_info); @@ -6649,8 +6573,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, btrfs_ino(dir), objectid, - S_IFDIR | mode, &index); + dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid, + S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -6810,7 +6734,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int err = 0; u64 extent_start = 0; u64 extent_end = 0; - u64 objectid = btrfs_ino(inode); + u64 objectid = btrfs_ino(BTRFS_I(inode)); u32 found_type; struct btrfs_path *path = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -7068,7 +6992,7 @@ insert: write_unlock(&em_tree->lock); out: - trace_btrfs_get_extent(root, inode, em); + trace_btrfs_get_extent(root, BTRFS_I(inode), em); btrfs_free_path(path); if (trans) { @@ -7225,9 +7149,11 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, int ret; if (type != BTRFS_ORDERED_NOCOW) { - em = create_pinned_em(inode, start, len, orig_start, - block_start, block_len, orig_block_len, - ram_bytes, type); + em = create_io_em(inode, start, len, orig_start, + block_start, block_len, orig_block_len, + ram_bytes, + BTRFS_COMPRESS_NONE, /* compress_type */ + type); if (IS_ERR(em)) goto out; } @@ -7264,7 +7190,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, em = btrfs_create_dio_extent(inode, start, ins.offset, start, ins.objectid, ins.offset, ins.offset, - ins.offset, 0); + ins.offset, BTRFS_ORDERED_REGULAR); btrfs_dec_block_group_reservations(fs_info, ins.objectid); if (IS_ERR(em)) btrfs_free_reserved_extent(fs_info, ins.objectid, @@ -7282,7 +7208,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *ram_bytes) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_trans_handle *trans; struct btrfs_path *path; int ret; struct extent_buffer *leaf; @@ -7302,8 +7227,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, if (!path) return -ENOMEM; - ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), - offset, 0); + ret = btrfs_lookup_file_extent(NULL, root, path, + btrfs_ino(BTRFS_I(inode)), offset, 0); if (ret < 0) goto out; @@ -7319,7 +7244,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, ret = 0; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != btrfs_ino(inode) || + if (key.objectid != btrfs_ino(BTRFS_I(inode)) || key.type != BTRFS_EXTENT_DATA_KEY) { /* not our file or wrong item type, must cow */ goto out; @@ -7385,15 +7310,9 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, * look for other files referencing this extent, if we * find any we must cow */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = 0; - goto out; - } - ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), + ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)), key.offset - backref_offset, disk_bytenr); - btrfs_end_transaction(trans); if (ret) { ret = 0; goto out; @@ -7570,17 +7489,23 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, return ret; } -static struct extent_map *create_pinned_em(struct inode *inode, u64 start, - u64 len, u64 orig_start, - u64 block_start, u64 block_len, - u64 orig_block_len, u64 ram_bytes, - int type) +/* The callers of this must take lock_extent() */ +static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, + u64 orig_start, u64 block_start, + u64 block_len, u64 orig_block_len, + u64 ram_bytes, int compress_type, + int type) { struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; + ASSERT(type == BTRFS_ORDERED_PREALLOC || + type == BTRFS_ORDERED_COMPRESSED || + type == BTRFS_ORDERED_NOCOW || + type == BTRFS_ORDERED_REGULAR); + em_tree = &BTRFS_I(inode)->extent_tree; em = alloc_extent_map(); if (!em) @@ -7588,8 +7513,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, em->start = start; em->orig_start = orig_start; - em->mod_start = start; - em->mod_len = len; em->len = len; em->block_len = block_len; em->block_start = block_start; @@ -7598,8 +7521,12 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, em->ram_bytes = ram_bytes; em->generation = -1; set_bit(EXTENT_FLAG_PINNED, &em->flags); - if (type == BTRFS_ORDERED_PREALLOC) + if (type == BTRFS_ORDERED_PREALLOC) { set_bit(EXTENT_FLAG_FILLING, &em->flags); + } else if (type == BTRFS_ORDERED_COMPRESSED) { + set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + em->compress_type = compress_type; + } do { btrfs_drop_extent_cache(inode, em->start, @@ -7607,6 +7534,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); + /* + * The caller has taken lock_extent(), who could race with us + * to add em? + */ } while (ret == -EEXIST); if (ret) { @@ -7614,6 +7545,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return ERR_PTR(ret); } + /* em got 2 refs now, callers needs to do free_extent_map once. */ return em; } @@ -7621,10 +7553,8 @@ static void adjust_dio_outstanding_extents(struct inode *inode, struct btrfs_dio_data *dio_data, const u64 len) { - unsigned num_extents; + unsigned num_extents = count_max_extents(len); - num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); /* * If we have an outstanding_extents count still set then we're * within our reservation, otherwise we need to adjust our inode @@ -7804,7 +7734,7 @@ unlock: * Need to update the i_size under the extent lock so buffered * readers will get the updated i_size when we unlock. */ - if (start + len > i_size_read(inode)) + if (!dio_data->overwrite && start + len > i_size_read(inode)) i_size_write(inode, start + len); adjust_dio_outstanding_extents(inode, dio_data, len); @@ -8254,7 +8184,8 @@ static void btrfs_end_dio_bio(struct bio *bio) if (err) btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d", - btrfs_ino(dip->inode), bio_op(bio), bio->bi_opf, + btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio), + bio->bi_opf, (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size, err); @@ -8679,15 +8610,14 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) * not unlock the i_mutex at this case. */ if (offset + count <= inode->i_size) { + dio_data.overwrite = 1; inode_unlock(inode); relock = true; } ret = btrfs_delalloc_reserve_space(inode, offset, count); if (ret) goto out; - dio_data.outstanding_extents = div64_u64(count + - BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE); + dio_data.outstanding_extents = count_max_extents(count); /* * We need to know how many extents we reserved so that we can @@ -8831,7 +8761,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) { if (PageWriteback(page) || PageDirty(page)) return 0; - return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); + return __btrfs_releasepage(page, gfp_flags); } static void btrfs_invalidatepage(struct page *page, unsigned int offset, @@ -8964,10 +8894,10 @@ again: * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int btrfs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; @@ -9000,7 +8930,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = btrfs_delalloc_reserve_space(inode, page_start, reserved_space); if (!ret) { - ret = file_update_time(vma->vm_file); + ret = file_update_time(vmf->vma->vm_file); reserved = 1; } if (ret) { @@ -9032,7 +8962,7 @@ again: * we can't set the delalloc bits if there are pending ordered * extents. Drop our locks and wait for them to finish */ - ordered = btrfs_lookup_ordered_range(inode, page_start, page_end); + ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE); if (ordered) { unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); @@ -9056,11 +8986,11 @@ again: } /* - * XXX - page_mkwrite gets called every time the page is dirtied, even - * if it was already dirty, so for space accounting reasons we need to - * clear any delalloc bits for the range we are fixing to save. There - * is probably a better way to do this, but for now keep consistent with - * prepare_pages in the normal write path. + * page_mkwrite gets called when the page is firstly dirtied after it's + * faulted in, but write(2) could also dirty a page and set delalloc + * bits, thus in this case for space account reason, we still need to + * clear any delalloc bits within this page range since we have to + * reserve data&meta space before lock_page() (see above comments). */ clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, EXTENT_DIRTY | EXTENT_DELALLOC | @@ -9384,7 +9314,7 @@ void btrfs_destroy_inode(struct inode *inode) if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)) { btrfs_info(fs_info, "inode %llu still on the orphan list", - btrfs_ino(inode)); + btrfs_ino(BTRFS_I(inode))); atomic_dec(&root->orphan_inodes); } @@ -9513,8 +9443,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = current_time(old_inode); struct dentry *parent; - u64 old_ino = btrfs_ino(old_inode); - u64 new_ino = btrfs_ino(new_inode); + u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); + u64 new_ino = btrfs_ino(BTRFS_I(new_inode)); u64 old_idx = 0; u64 new_idx = 0; u64 root_objectid; @@ -9571,7 +9501,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_dentry->d_name.name, new_dentry->d_name.len, old_ino, - btrfs_ino(new_dir), old_idx); + btrfs_ino(BTRFS_I(new_dir)), + old_idx); if (ret) goto out_fail; } @@ -9587,7 +9518,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, old_dentry->d_name.name, old_dentry->d_name.len, new_ino, - btrfs_ino(old_dir), new_idx); + btrfs_ino(BTRFS_I(old_dir)), + new_idx); if (ret) goto out_fail; } @@ -9603,8 +9535,10 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_inode->i_ctime = ctime; if (old_dentry->d_parent != new_dentry->d_parent) { - btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); - btrfs_record_unlink_dir(trans, new_dir, new_inode, 1); + btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), + BTRFS_I(old_inode), 1); + btrfs_record_unlink_dir(trans, BTRFS_I(new_dir), + BTRFS_I(new_inode), 1); } /* src is a subvolume */ @@ -9615,8 +9549,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, old_dentry->d_name.name, old_dentry->d_name.len); } else { /* src is an inode */ - ret = __btrfs_unlink_inode(trans, root, old_dir, - old_dentry->d_inode, + ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir), + BTRFS_I(old_dentry->d_inode), old_dentry->d_name.name, old_dentry->d_name.len); if (!ret) @@ -9635,8 +9569,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_dentry->d_name.name, new_dentry->d_name.len); } else { /* dest is an inode */ - ret = __btrfs_unlink_inode(trans, dest, new_dir, - new_dentry->d_inode, + ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), + BTRFS_I(new_dentry->d_inode), new_dentry->d_name.name, new_dentry->d_name.len); if (!ret) @@ -9670,13 +9604,15 @@ static int btrfs_rename_exchange(struct inode *old_dir, if (root_log_pinned) { parent = new_dentry->d_parent; - btrfs_log_new_name(trans, old_inode, old_dir, parent); + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), + parent); btrfs_end_log_trans(root); root_log_pinned = false; } if (dest_log_pinned) { parent = old_dentry->d_parent; - btrfs_log_new_name(trans, new_inode, new_dir, parent); + btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), + parent); btrfs_end_log_trans(dest); dest_log_pinned = false; } @@ -9693,11 +9629,11 @@ out_fail: * allow the tasks to sync it. */ if (ret && (root_log_pinned || dest_log_pinned)) { - if (btrfs_inode_in_log(old_dir, fs_info->generation) || - btrfs_inode_in_log(new_dir, fs_info->generation) || - btrfs_inode_in_log(old_inode, fs_info->generation) || + if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || (new_inode && - btrfs_inode_in_log(new_inode, fs_info->generation))) + btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) btrfs_set_log_full_commit(fs_info, trans); if (root_log_pinned) { @@ -9736,7 +9672,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans, inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, - btrfs_ino(dir), + btrfs_ino(BTRFS_I(dir)), objectid, S_IFCHR | WHITEOUT_MODE, &index); @@ -9784,10 +9720,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, u64 index = 0; u64 root_objectid; int ret; - u64 old_ino = btrfs_ino(old_inode); + u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); bool log_pinned = false; - if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; /* we only allow rename subvolume link between subvolumes */ @@ -9795,7 +9731,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, return -EXDEV; if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || - (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID)) + (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID)) return -ENOTEMPTY; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -9870,7 +9806,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.name, new_dentry->d_name.len, old_ino, - btrfs_ino(new_dir), index); + btrfs_ino(BTRFS_I(new_dir)), index); if (ret) goto out_fail; } @@ -9883,7 +9819,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = current_time(old_dir); if (old_dentry->d_parent != new_dentry->d_parent) - btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); + btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), + BTRFS_I(old_inode), 1); if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; @@ -9891,8 +9828,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_name.name, old_dentry->d_name.len); } else { - ret = __btrfs_unlink_inode(trans, root, old_dir, - d_inode(old_dentry), + ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir), + BTRFS_I(d_inode(old_dentry)), old_dentry->d_name.name, old_dentry->d_name.len); if (!ret) @@ -9906,7 +9843,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode) { inode_inc_iversion(new_inode); new_inode->i_ctime = current_time(new_inode); - if (unlikely(btrfs_ino(new_inode) == + if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { root_objectid = BTRFS_I(new_inode)->location.objectid; ret = btrfs_unlink_subvol(trans, dest, new_dir, @@ -9915,8 +9852,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.len); BUG_ON(new_inode->i_nlink == 0); } else { - ret = btrfs_unlink_inode(trans, dest, new_dir, - d_inode(new_dentry), + ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), + BTRFS_I(d_inode(new_dentry)), new_dentry->d_name.name, new_dentry->d_name.len); } @@ -9942,7 +9879,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (log_pinned) { struct dentry *parent = new_dentry->d_parent; - btrfs_log_new_name(trans, old_inode, old_dir, parent); + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), + parent); btrfs_end_log_trans(root); log_pinned = false; } @@ -9969,11 +9907,11 @@ out_fail: * allow the tasks to sync it. */ if (ret && log_pinned) { - if (btrfs_inode_in_log(old_dir, fs_info->generation) || - btrfs_inode_in_log(new_dir, fs_info->generation) || - btrfs_inode_in_log(old_inode, fs_info->generation) || + if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) || + btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || (new_inode && - btrfs_inode_in_log(new_inode, fs_info->generation))) + btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) btrfs_set_log_full_commit(fs_info, trans); btrfs_end_log_trans(root); @@ -10237,8 +10175,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, btrfs_ino(dir), objectid, - S_IFLNK|S_IRWXUGO, &index); + dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), + objectid, S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -10264,7 +10202,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, err = -ENOMEM; goto out_unlock_inode; } - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.offset = 0; key.type = BTRFS_EXTENT_DATA_KEY; datasize = btrfs_file_extent_calc_inline_size(name_len); @@ -10517,7 +10455,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; inode = btrfs_new_inode(trans, root, dir, NULL, 0, - btrfs_ino(dir), objectid, mode, &index); + btrfs_ino(BTRFS_I(dir)), objectid, mode, &index); if (IS_ERR(inode)) { ret = PTR_ERR(inode); inode = NULL; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 21e51b0ba188..d8539979b44f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -395,7 +395,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) q = bdev_get_queue(device->bdev); if (blk_queue_discard(q)) { num_devices++; - minlen = min((u64)q->limits.discard_granularity, + minlen = min_t(u64, q->limits.discard_granularity, minlen); } } @@ -487,8 +487,7 @@ static noinline int create_subvol(struct inode *dir, trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - btrfs_subvolume_release_metadata(fs_info, &block_rsv, - qgroup_reserved); + btrfs_subvolume_release_metadata(fs_info, &block_rsv); goto fail_free; } trans->block_rsv = &block_rsv; @@ -601,7 +600,7 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_add_root_ref(trans, fs_info, objectid, root->root_key.objectid, - btrfs_ino(dir), index, name, namelen); + btrfs_ino(BTRFS_I(dir)), index, name, namelen); BUG_ON(ret); ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid, @@ -613,7 +612,7 @@ fail: kfree(root_item); trans->block_rsv = NULL; trans->bytes_reserved = 0; - btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved); + btrfs_subvolume_release_metadata(fs_info, &block_rsv); if (async_transid) { *async_transid = trans->transid; @@ -657,7 +656,7 @@ static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) } static int create_snapshot(struct btrfs_root *root, struct inode *dir, - struct dentry *dentry, char *name, int namelen, + struct dentry *dentry, u64 *async_transid, bool readonly, struct btrfs_qgroup_inherit *inherit) { @@ -670,12 +669,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return -EINVAL; - pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); + pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL); if (!pending_snapshot) return -ENOMEM; pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), - GFP_NOFS); + GFP_KERNEL); pending_snapshot->path = btrfs_alloc_path(); if (!pending_snapshot->root_item || !pending_snapshot->path) { ret = -ENOMEM; @@ -753,9 +752,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, d_instantiate(dentry, inode); ret = 0; fail: - btrfs_subvolume_release_metadata(fs_info, - &pending_snapshot->block_rsv, - pending_snapshot->qgroup_reserved); + btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); dec_and_free: if (atomic_dec_and_test(&root->will_be_snapshoted)) wake_up_atomic_t(&root->will_be_snapshoted); @@ -874,7 +871,7 @@ static noinline int btrfs_mksubvol(const struct path *parent, goto out_up_read; if (snap_src) { - error = create_snapshot(snap_src, dir, dentry, name, namelen, + error = create_snapshot(snap_src, dir, dentry, async_transid, readonly, inherit); } else { error = create_subvol(dir, dentry, name, namelen, @@ -941,7 +938,7 @@ static int find_new_extents(struct btrfs_root *root, struct btrfs_file_extent_item *extent; int type; int ret; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); path = btrfs_alloc_path(); if (!path) @@ -1780,7 +1777,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file, int ret = 0; u64 flags = 0; - if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) + if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) return -EINVAL; down_read(&fs_info->subvol_sem); @@ -1812,7 +1809,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, if (ret) goto out; - if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { + if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; goto out_drop_write; } @@ -2446,7 +2443,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, if (err) goto out_dput; - if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { + if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { err = -EINVAL; goto out_dput; } @@ -2497,7 +2494,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; - btrfs_record_snapshot_destroy(trans, dir); + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); ret = btrfs_unlink_subvol(trans, root, dir, dest->root_key.objectid, @@ -2555,7 +2552,7 @@ out_end_trans: err = ret; inode->i_flags |= S_DEAD; out_release: - btrfs_subvolume_release_metadata(fs_info, &block_rsv, qgroup_reserved); + btrfs_subvolume_release_metadata(fs_info, &block_rsv); out_up_write: up_write(&fs_info->subvol_sem); if (err) { @@ -2613,9 +2610,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) goto out; } ret = btrfs_defrag_root(root); - if (ret) - goto out; - ret = btrfs_defrag_root(root->fs_info->extent_root); break; case S_IFREG: if (!(file->f_mode & FMODE_WRITE)) { @@ -3047,11 +3041,21 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, cmp->src_pages = src_pgarr; cmp->dst_pages = dst_pgarr; - ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); + /* + * If deduping ranges in the same inode, locking rules make it mandatory + * to always lock pages in ascending order to avoid deadlocks with + * concurrent tasks (such as starting writeback/delalloc). + */ + if (src == dst && dst_loff < loff) { + swap(src_pgarr, dst_pgarr); + swap(loff, dst_loff); + } + + ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff); if (ret) goto out; - ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); + ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff); out: if (ret) @@ -3059,8 +3063,7 @@ out: return 0; } -static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, - u64 dst_loff, u64 len, struct cmp_pages *cmp) +static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) { int ret = 0; int i; @@ -3128,26 +3131,27 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, int ret; u64 len = olen; struct cmp_pages cmp; - int same_inode = 0; + bool same_inode = (src == dst); u64 same_lock_start = 0; u64 same_lock_len = 0; - if (src == dst) - same_inode = 1; - if (len == 0) return 0; - if (same_inode) { + if (same_inode) inode_lock(src); + else + btrfs_double_inode_lock(src, dst); - ret = extent_same_check_offsets(src, loff, &len, olen); - if (ret) - goto out_unlock; - ret = extent_same_check_offsets(src, dst_loff, &len, olen); - if (ret) - goto out_unlock; + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); + if (ret) + goto out_unlock; + if (same_inode) { /* * Single inode case wants the same checks, except we * don't want our length pushed out past i_size as @@ -3175,16 +3179,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, same_lock_start = min_t(u64, loff, dst_loff); same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; - } else { - btrfs_double_inode_lock(src, dst); - - ret = extent_same_check_offsets(src, loff, &len, olen); - if (ret) - goto out_unlock; - - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); - if (ret) - goto out_unlock; } /* don't make the dst file partly checksummed */ @@ -3236,7 +3230,7 @@ again: } /* pass original length for comparison so we stay within i_size */ - ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); + ret = btrfs_cmp_data(olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); @@ -3399,8 +3393,7 @@ static void clone_update_extent_map(struct inode *inode, * data into the destination inode's inline extent if the later is greater then * the former. */ -static int clone_copy_inline_extent(struct inode *src, - struct inode *dst, +static int clone_copy_inline_extent(struct inode *dst, struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_key *new_key, @@ -3420,7 +3413,7 @@ static int clone_copy_inline_extent(struct inode *src, if (new_key->offset > 0) return -EOPNOTSUPP; - key.objectid = btrfs_ino(dst); + key.objectid = btrfs_ino(BTRFS_I(dst)); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -3435,7 +3428,7 @@ static int clone_copy_inline_extent(struct inode *src, goto copy_inline_extent; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid == btrfs_ino(dst) && + if (key.objectid == btrfs_ino(BTRFS_I(dst)) && key.type == BTRFS_EXTENT_DATA_KEY) { ASSERT(key.offset > 0); return -EOPNOTSUPP; @@ -3469,7 +3462,7 @@ static int clone_copy_inline_extent(struct inode *src, } else if (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid == btrfs_ino(dst) && + if (key.objectid == btrfs_ino(BTRFS_I(dst)) && key.type == BTRFS_EXTENT_DATA_KEY) return -EOPNOTSUPP; } @@ -3563,7 +3556,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, path->reada = READA_FORWARD; /* clone data */ - key.objectid = btrfs_ino(src); + key.objectid = btrfs_ino(BTRFS_I(src)); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = off; @@ -3606,7 +3599,7 @@ process_slot: btrfs_item_key_to_cpu(leaf, &key, slot); if (key.type > BTRFS_EXTENT_DATA_KEY || - key.objectid != btrfs_ino(src)) + key.objectid != btrfs_ino(BTRFS_I(src))) break; if (key.type == BTRFS_EXTENT_DATA_KEY) { @@ -3659,7 +3652,7 @@ process_slot: path->leave_spinning = 0; memcpy(&new_key, &key, sizeof(new_key)); - new_key.objectid = btrfs_ino(inode); + new_key.objectid = btrfs_ino(BTRFS_I(inode)); if (off <= key.offset) new_key.offset = key.offset + destoff - off; else @@ -3749,7 +3742,7 @@ process_slot: fs_info, disko, diskl, 0, root->root_key.objectid, - btrfs_ino(inode), + btrfs_ino(BTRFS_I(inode)), new_key.offset - datao); if (ret) { btrfs_abort_transaction(trans, @@ -3779,7 +3772,7 @@ process_slot: size -= skip + trim; datal -= skip + trim; - ret = clone_copy_inline_extent(src, inode, + ret = clone_copy_inline_extent(inode, trans, path, &new_key, drop_start, @@ -5129,7 +5122,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, down_write(&fs_info->subvol_sem); - if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { + if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { ret = -EINVAL; goto out; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 041c3326d109..bc2aba810629 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -432,7 +432,7 @@ out: } /* Needs to either be called under a log transaction or the log_mutex */ -void btrfs_get_logged_extents(struct inode *inode, +void btrfs_get_logged_extents(struct btrfs_inode *inode, struct list_head *logged_list, const loff_t start, const loff_t end) @@ -442,7 +442,7 @@ void btrfs_get_logged_extents(struct inode *inode, struct rb_node *n; struct rb_node *prev; - tree = &BTRFS_I(inode)->ordered_tree; + tree = &inode->ordered_tree; spin_lock_irq(&tree->lock); n = __tree_search(&tree->tree, end, &prev); if (!n) @@ -984,8 +984,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, } disk_i_size = BTRFS_I(inode)->disk_i_size; - /* truncate file */ - if (disk_i_size > i_size) { + /* + * truncate file. + * If ordered is not NULL, then this is called from endio and + * disk_i_size will be updated by either truncate itself or any + * in-flight IOs which are inside the disk_i_size. + * + * Because btrfs_setsize() may set i_size with disk_i_size if truncate + * fails somehow, we need to make sure we have a precise disk_i_size by + * updating it as usual. + * + */ + if (!ordered && disk_i_size > i_size) { BTRFS_I(inode)->disk_i_size = orig_offset; ret = 0; goto out; @@ -1032,25 +1042,22 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, /* We treat this entry as if it doesn't exist */ if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) continue; - if (test->file_offset + test->len <= disk_i_size) + + if (entry_end(test) <= disk_i_size) break; if (test->file_offset >= i_size) break; - if (entry_end(test) > disk_i_size) { - /* - * we don't update disk_i_size now, so record this - * undealt i_size. Or we will not know the real - * i_size. - */ - if (test->outstanding_isize < offset) - test->outstanding_isize = offset; - if (ordered && - ordered->outstanding_isize > - test->outstanding_isize) - test->outstanding_isize = - ordered->outstanding_isize; - goto out; - } + + /* + * We don't update disk_i_size now, so record this undealt + * i_size. Or we will not know the real i_size. + */ + if (test->outstanding_isize < offset) + test->outstanding_isize = offset; + if (ordered && + ordered->outstanding_isize > test->outstanding_isize) + test->outstanding_isize = ordered->outstanding_isize; + goto out; } new_i_size = min_t(u64, offset, i_size); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 5f2b0ca28705..a8cb8efe6fae 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -75,6 +75,8 @@ struct btrfs_ordered_sum { * in the logging code. */ #define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to * complete in the current transaction. */ +#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */ + struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -201,7 +203,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, const u64 range_start, const u64 range_len); int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, const u64 range_start, const u64 range_len); -void btrfs_get_logged_extents(struct inode *inode, +void btrfs_get_logged_extents(struct btrfs_inode *inode, struct list_head *logged_list, const loff_t start, const loff_t end); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index f2621e330954..d6cb155ef7a1 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -279,7 +279,7 @@ static void inode_prop_iterator(void *ctx, if (unlikely(ret)) btrfs_warn(root->fs_info, "error applying prop %s to ino %llu (root %llu): %d", - handler->xattr_name, btrfs_ino(inode), + handler->xattr_name, btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret); else set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags); @@ -288,7 +288,7 @@ static void inode_prop_iterator(void *ctx, int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); int ret; ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 662821f1252c..a5da750c1087 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -319,7 +319,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) return 0; - fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); + fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); if (!fs_info->qgroup_ulist) { ret = -ENOMEM; goto out; @@ -876,7 +876,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, goto out; } - fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); + fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); if (!fs_info->qgroup_ulist) { ret = -ENOMEM; goto out; @@ -1019,7 +1019,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, list_del("a_root->dirty_list); btrfs_tree_lock(quota_root->node); - clean_tree_block(trans, fs_info, quota_root->node); + clean_tree_block(fs_info, quota_root->node); btrfs_tree_unlock(quota_root->node); btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); @@ -1038,6 +1038,15 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info, list_add(&qgroup->dirty, &fs_info->dirty_qgroups); } +static void report_reserved_underflow(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup *qgroup, + u64 num_bytes) +{ + btrfs_warn(fs_info, + "qgroup %llu reserved space underflow, have: %llu, to free: %llu", + qgroup->qgroupid, qgroup->reserved, num_bytes); + qgroup->reserved = 0; +} /* * The easy accounting, if we are adding/removing the only ref for an extent * then this qgroup and all of the parent qgroups get their reference and @@ -1065,8 +1074,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, WARN_ON(sign < 0 && qgroup->excl < num_bytes); qgroup->excl += sign * num_bytes; qgroup->excl_cmpr += sign * num_bytes; - if (sign > 0) - qgroup->reserved -= num_bytes; + if (sign > 0) { + if (WARN_ON(qgroup->reserved < num_bytes)) + report_reserved_underflow(fs_info, qgroup, num_bytes); + else + qgroup->reserved -= num_bytes; + } qgroup_dirty(fs_info, qgroup); @@ -1086,8 +1099,13 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, qgroup->rfer_cmpr += sign * num_bytes; WARN_ON(sign < 0 && qgroup->excl < num_bytes); qgroup->excl += sign * num_bytes; - if (sign > 0) - qgroup->reserved -= num_bytes; + if (sign > 0) { + if (WARN_ON(qgroup->reserved < num_bytes)) + report_reserved_underflow(fs_info, qgroup, + num_bytes); + else + qgroup->reserved -= num_bytes; + } qgroup->excl_cmpr += sign * num_bytes; qgroup_dirty(fs_info, qgroup); @@ -1156,7 +1174,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) return -EINVAL; - tmp = ulist_alloc(GFP_NOFS); + tmp = ulist_alloc(GFP_KERNEL); if (!tmp) return -ENOMEM; @@ -1205,7 +1223,7 @@ out: return ret; } -int __del_qgroup_relation(struct btrfs_trans_handle *trans, +static int __del_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst) { struct btrfs_root *quota_root; @@ -1216,7 +1234,7 @@ int __del_qgroup_relation(struct btrfs_trans_handle *trans, int ret = 0; int err; - tmp = ulist_alloc(GFP_NOFS); + tmp = ulist_alloc(GFP_KERNEL); if (!tmp) return -ENOMEM; @@ -1446,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, while (node) { record = rb_entry(node, struct btrfs_qgroup_extent_record, node); - ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, - &record->old_roots); + if (WARN_ON(!record->old_roots)) + ret = btrfs_find_all_roots(NULL, fs_info, + record->bytenr, 0, &record->old_roots); if (ret < 0) break; if (qgroup_to_skip) @@ -1486,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, return 0; } +int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup_extent_record *qrecord) +{ + struct ulist *old_root; + u64 bytenr = qrecord->bytenr; + int ret; + + ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); + if (ret < 0) + return ret; + + /* + * Here we don't need to get the lock of + * trans->transaction->delayed_refs, since inserted qrecord won't + * be deleted, only qrecord->node may be modified (new qrecord insert) + * + * So modifying qrecord->old_roots is safe here + */ + qrecord->old_roots = old_root; + return 0; +} + int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, gfp_t gfp_flag) @@ -1511,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, spin_lock(&delayed_refs->lock); ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); spin_unlock(&delayed_refs->lock); - if (ret > 0) + if (ret > 0) { kfree(record); - return 0; + return 0; + } + return btrfs_qgroup_trace_extent_post(fs_info, record); } int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, @@ -1571,8 +1614,7 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, * If we increment the root nodes slot counter past the number of * elements, 1 is returned to signal completion of the search. */ -static int adjust_slots_upwards(struct btrfs_root *root, - struct btrfs_path *path, int root_level) +static int adjust_slots_upwards(struct btrfs_path *path, int root_level) { int level = 0; int nr, slot; @@ -1713,7 +1755,7 @@ walk_down: goto out; /* Nonzero return here means we completed our search */ - ret = adjust_slots_upwards(root, path, root_level); + ret = adjust_slots_upwards(path, root_level); if (ret) break; @@ -1927,13 +1969,14 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 nr_old_roots = 0; int ret = 0; + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) + return 0; + if (new_roots) nr_new_roots = new_roots->nnodes; if (old_roots) nr_old_roots = old_roots->nnodes; - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) - goto out_free; BUG_ON(!fs_info->quota_root); trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, @@ -2170,9 +2213,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, goto out; } - rcu_read_lock(); level_size = fs_info->nodesize; - rcu_read_unlock(); } /* @@ -2306,7 +2347,20 @@ out: return ret; } -static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) +static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) +{ + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && + qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) + return false; + + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && + qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) + return false; + + return true; +} + +static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) { struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; @@ -2347,16 +2401,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) qg = unode_aux_to_qgroup(unode); - if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && - qg->reserved + (s64)qg->rfer + num_bytes > - qg->max_rfer) { - ret = -EDQUOT; - goto out; - } - - if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && - qg->reserved + (s64)qg->excl + num_bytes > - qg->max_excl) { + if (enforce && !qgroup_check_limits(qg, num_bytes)) { ret = -EDQUOT; goto out; } @@ -2424,7 +2469,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, qg = unode_aux_to_qgroup(unode); - qg->reserved -= num_bytes; + if (WARN_ON(qg->reserved < num_bytes)) + report_reserved_underflow(fs_info, qg, num_bytes); + else + qg->reserved -= num_bytes; list_for_each_entry(glist, &qg->groups, next_group) { ret = ulist_add(fs_info->qgroup_ulist, @@ -2439,11 +2487,6 @@ out: spin_unlock(&fs_info->qgroup_lock); } -static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes) -{ - return btrfs_qgroup_free_refroot(root->fs_info, root->objectid, - num_bytes); -} void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) { if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) @@ -2803,7 +2846,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) return 0; changeset.bytes_changed = 0; - changeset.range_changed = ulist_alloc(GFP_NOFS); + ulist_init(&changeset.range_changed); ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len -1, EXTENT_QGROUP_RESERVED, &changeset); trace_btrfs_qgroup_reserve_data(inode, start, len, @@ -2811,21 +2854,21 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) QGROUP_RESERVE); if (ret < 0) goto cleanup; - ret = qgroup_reserve(root, changeset.bytes_changed); + ret = qgroup_reserve(root, changeset.bytes_changed, true); if (ret < 0) goto cleanup; - ulist_free(changeset.range_changed); + ulist_release(&changeset.range_changed); return ret; cleanup: /* cleanup already reserved ranges */ ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(changeset.range_changed, &uiter))) + while ((unode = ulist_next(&changeset.range_changed, &uiter))) clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, GFP_NOFS); - ulist_free(changeset.range_changed); + ulist_release(&changeset.range_changed); return ret; } @@ -2837,23 +2880,22 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, int ret; changeset.bytes_changed = 0; - changeset.range_changed = ulist_alloc(GFP_NOFS); - if (!changeset.range_changed) - return -ENOMEM; - + ulist_init(&changeset.range_changed); ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len -1, EXTENT_QGROUP_RESERVED, &changeset); if (ret < 0) goto out; if (free) { - qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed); + btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, + BTRFS_I(inode)->root->objectid, + changeset.bytes_changed); trace_op = QGROUP_FREE; } trace_btrfs_qgroup_release_data(inode, start, len, changeset.bytes_changed, trace_op); out: - ulist_free(changeset.range_changed); + ulist_release(&changeset.range_changed); return ret; } @@ -2892,7 +2934,8 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) return __btrfs_qgroup_release_data(inode, start, len, 0); } -int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes) +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + bool enforce) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; @@ -2902,7 +2945,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes) return 0; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - ret = qgroup_reserve(root, num_bytes); + ret = qgroup_reserve(root, num_bytes, enforce); if (ret < 0) return ret; atomic_add(num_bytes, &root->qgroup_meta_rsv); @@ -2921,7 +2964,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root) reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); if (reserved == 0) return; - qgroup_free(root, reserved); + btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); } void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) @@ -2935,7 +2978,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes); atomic_sub(num_bytes, &root->qgroup_meta_rsv); - qgroup_free(root, num_bytes); + btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); } /* @@ -2950,22 +2993,22 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode) int ret; changeset.bytes_changed = 0; - changeset.range_changed = ulist_alloc(GFP_NOFS); - if (WARN_ON(!changeset.range_changed)) - return; - + ulist_init(&changeset.range_changed); ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, EXTENT_QGROUP_RESERVED, &changeset); WARN_ON(ret < 0); if (WARN_ON(changeset.bytes_changed)) { ULIST_ITER_INIT(&iter); - while ((unode = ulist_next(changeset.range_changed, &iter))) { + while ((unode = ulist_next(&changeset.range_changed, &iter))) { btrfs_warn(BTRFS_I(inode)->root->fs_info, "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", inode->i_ino, unode->val, unode->aux); } - qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed); + btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, + BTRFS_I(inode)->root->objectid, + changeset.bytes_changed); + } - ulist_free(changeset.range_changed); + ulist_release(&changeset.range_changed); } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 416ae8e1d23c..26932a8a1993 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); /* * Inform qgroup to trace one dirty extent, its info is recorded in @record. - * So qgroup can account it at commit trans time. + * So qgroup can account it at transaction committing time. * - * No lock version, caller must acquire delayed ref lock and allocate memory. + * No lock version, caller must acquire delayed ref lock and allocated memory, + * then call btrfs_qgroup_trace_extent_post() after exiting lock context. * * Return 0 for success insert * Return >0 for existing record, caller can free @record safely. @@ -108,11 +109,37 @@ int btrfs_qgroup_trace_extent_nolock( struct btrfs_qgroup_extent_record *record); /* + * Post handler after qgroup_trace_extent_nolock(). + * + * NOTE: Current qgroup does the expensive backref walk at transaction + * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming + * new transaction. + * This is designed to allow btrfs_find_all_roots() to get correct new_roots + * result. + * + * However for old_roots there is no need to do backref walk at that time, + * since we search commit roots to walk backref and result will always be + * correct. + * + * Due to the nature of no lock version, we can't do backref there. + * So we must call btrfs_qgroup_trace_extent_post() after exiting + * spinlock context. + * + * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result + * using current root, then we can move all expensive backref walk out of + * transaction committing, but not now as qgroup accounting will be wrong again. + */ +int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup_extent_record *qrecord); + +/* * Inform qgroup to trace one dirty extent, specified by @bytenr and * @num_bytes. * So qgroup can account it at commit trans time. * - * Better encapsulated version. + * Better encapsulated version, with memory allocation and backref walk for + * commit roots. + * So this can sleep. * * Return 0 if the operation is done. * Return <0 for error, like memory allocation failure or invalid parameter @@ -181,7 +208,8 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len); int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len); -int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes); +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + bool enforce); void btrfs_qgroup_free_meta_all(struct btrfs_root *root); void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes); void btrfs_qgroup_check_reserved_leak(struct inode *inode); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index d2a9a1ee5361..1571bf26dc07 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -677,11 +677,9 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; - int walk = 0; spin_lock_irqsave(&h->lock, flags); list_for_each_entry(cur, &h->hash_list, hash_list) { - walk++; if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) { spin_lock(&cur->bio_list_lock); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 379711048fb0..ddbde0f08365 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1548,9 +1548,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < btrfs_ino(&entry->vfs_inode)) + if (objectid < btrfs_ino(entry)) node = node->rb_left; - else if (objectid > btrfs_ino(&entry->vfs_inode)) + else if (objectid > btrfs_ino(entry)) node = node->rb_right; else break; @@ -1558,7 +1558,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= btrfs_ino(&entry->vfs_inode)) { + if (objectid <= btrfs_ino(entry)) { node = prev; break; } @@ -1573,7 +1573,7 @@ again: return inode; } - objectid = btrfs_ino(&entry->vfs_inode) + 1; + objectid = btrfs_ino(entry) + 1; if (cond_resched_lock(&root->inode_lock)) goto again; @@ -1609,8 +1609,8 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, return -ENOMEM; bytenr -= BTRFS_I(reloc_inode)->index_cnt; - ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), - bytenr, 0); + ret = btrfs_lookup_file_extent(NULL, root, path, + btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0); if (ret < 0) goto out; if (ret > 0) { @@ -1698,11 +1698,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans, if (first) { inode = find_next_inode(root, key.objectid); first = 0; - } else if (inode && btrfs_ino(inode) < key.objectid) { + } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) { btrfs_add_delayed_iput(inode); inode = find_next_inode(root, key.objectid); } - if (inode && btrfs_ino(inode) == key.objectid) { + if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) { end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); WARN_ON(!IS_ALIGNED(key.offset, @@ -2088,7 +2088,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, inode = find_next_inode(root, objectid); if (!inode) break; - ino = btrfs_ino(inode); + ino = btrfs_ino(BTRFS_I(inode)); if (ino > max_key->objectid) { iput(inode); @@ -3543,7 +3543,7 @@ truncate: goto out; } - ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode); + ret = btrfs_truncate_free_space_cache(trans, block_group, inode); btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); @@ -4334,7 +4334,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) rc->block_group = btrfs_lookup_block_group(fs_info, group_start); BUG_ON(!rc->block_group); - ret = btrfs_inc_block_group_ro(extent_root, rc->block_group); + ret = btrfs_inc_block_group_ro(fs_info, rc->block_group); if (ret) { err = ret; goto out; @@ -4347,8 +4347,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) goto out; } - inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, - path); + inode = lookup_free_space_inode(fs_info, rc->block_group, path); btrfs_free_path(path); if (!IS_ERR(inode)) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 4c6735491ee0..a08224eab8b4 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -74,7 +74,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot, * * If we find something return 0, otherwise > 0, < 0 on error. */ -int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, +int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key, struct btrfs_path *path, struct btrfs_root_item *root_item, struct btrfs_key *root_key) { @@ -207,7 +207,7 @@ out: } int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_key *key, struct btrfs_root_item *item) + const struct btrfs_key *key, struct btrfs_root_item *item) { /* * Make sure generation v1 and v2 match. See update_root for details. @@ -337,7 +337,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) /* drop the root item for 'key' from 'root' */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_key *key) + const struct btrfs_key *key) { struct btrfs_path *path; int ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9a94670536a6..ff9a11c39f5e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -282,9 +282,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, u64 *extent_physical, struct btrfs_device **extent_dev, int *extent_mirror_num); -static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, - struct scrub_wr_ctx *wr_ctx, - struct btrfs_fs_info *fs_info, +static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx, struct btrfs_device *dev, int is_dev_replace); static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); @@ -501,7 +499,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) spin_lock_init(&sctx->stat_lock); init_waitqueue_head(&sctx->list_wait); - ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info, + ret = scrub_setup_wr_ctx(&sctx->wr_ctx, fs_info->dev_replace.tgtdev, is_dev_replace); if (ret) { scrub_free_ctx(sctx); @@ -3584,7 +3582,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * -> btrfs_scrub_pause() */ scrub_pause_on(fs_info); - ret = btrfs_inc_block_group_ro(root, cache); + ret = btrfs_inc_block_group_ro(fs_info, cache); if (!ret && is_dev_replace) { /* * If we are doing a device replace wait for any tasks @@ -4084,9 +4082,7 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, btrfs_put_bbio(bbio); } -static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, - struct scrub_wr_ctx *wr_ctx, - struct btrfs_fs_info *fs_info, +static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx, struct btrfs_device *dev, int is_dev_replace) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b5ae7d3d1896..da687dc79cce 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -265,7 +265,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, function, line, errstr); return; } - ACCESS_ONCE(trans->transaction->aborted) = errno; + WRITE_ONCE(trans->transaction->aborted, errno); /* Wake up anybody who may be waiting on this transaction */ wake_up(&fs_info->transaction_wait); wake_up(&fs_info->transaction_blocked_wait); @@ -1114,7 +1114,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec static int btrfs_fill_super(struct super_block *sb, struct btrfs_fs_devices *fs_devices, - void *data, int silent) + void *data) { struct inode *inode; struct btrfs_fs_info *fs_info = btrfs_sb(sb); @@ -1611,8 +1611,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); btrfs_sb(s)->bdev_holder = fs_type; - error = btrfs_fill_super(s, fs_devices, data, - flags & MS_SILENT ? 1 : 0); + error = btrfs_fill_super(s, fs_devices, data); } if (error) { deactivate_locked_super(s); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0e0508f488b2..6b3e0fc2fe7a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -474,7 +474,8 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root) static struct btrfs_trans_handle * start_transaction(struct btrfs_root *root, unsigned int num_items, - unsigned int type, enum btrfs_reserve_flush_enum flush) + unsigned int type, enum btrfs_reserve_flush_enum flush, + bool enforce_qgroups) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -505,9 +506,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, * Do the reservation before we join the transaction so we can do all * the appropriate flushing if need be. */ - if (num_items > 0 && root != fs_info->chunk_root) { + if (num_items && root != fs_info->chunk_root) { qgroup_reserved = num_items * fs_info->nodesize; - ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved); + ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved, + enforce_qgroups); if (ret) return ERR_PTR(ret); @@ -613,8 +615,9 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, unsigned int num_items) { return start_transaction(root, num_items, TRANS_START, - BTRFS_RESERVE_FLUSH_ALL); + BTRFS_RESERVE_FLUSH_ALL, true); } + struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( struct btrfs_root *root, unsigned int num_items, @@ -625,7 +628,14 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( u64 num_bytes; int ret; - trans = btrfs_start_transaction(root, num_items); + /* + * We have two callers: unlink and block group removal. The + * former should succeed even if we will temporarily exceed + * quota and the latter operates on the extent root so + * qgroup enforcement is ignored anyway. + */ + trans = start_transaction(root, num_items, TRANS_START, + BTRFS_RESERVE_FLUSH_ALL, false); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; @@ -654,25 +664,25 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush( unsigned int num_items) { return start_transaction(root, num_items, TRANS_START, - BTRFS_RESERVE_FLUSH_LIMIT); + BTRFS_RESERVE_FLUSH_LIMIT, true); } struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) { - return start_transaction(root, 0, TRANS_JOIN, - BTRFS_RESERVE_NO_FLUSH); + return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH, + true); } struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_JOIN_NOLOCK, - BTRFS_RESERVE_NO_FLUSH); + BTRFS_RESERVE_NO_FLUSH, true); } struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_USERSPACE, - BTRFS_RESERVE_NO_FLUSH); + BTRFS_RESERVE_NO_FLUSH, true); } /* @@ -691,7 +701,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_ATTACH, - BTRFS_RESERVE_NO_FLUSH); + BTRFS_RESERVE_NO_FLUSH, true); } /* @@ -707,7 +717,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) struct btrfs_trans_handle *trans; trans = start_transaction(root, 0, TRANS_ATTACH, - BTRFS_RESERVE_NO_FLUSH); + BTRFS_RESERVE_NO_FLUSH, true); if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) btrfs_wait_for_commit(root->fs_info, 0); @@ -866,14 +876,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (lock && !atomic_read(&info->open_ioctl_trans) && should_end_transaction(trans) && - ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { + READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { spin_lock(&info->trans_lock); if (cur_trans->state == TRANS_STATE_RUNNING) cur_trans->state = TRANS_STATE_BLOCKED; spin_unlock(&info->trans_lock); } - if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { + if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { if (throttle) return btrfs_commit_transaction(trans); else @@ -1354,12 +1364,8 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, * enabled. If this check races with the ioctl, rescan will * kick in anyway. */ - mutex_lock(&fs_info->qgroup_ioctl_lock); - if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { - mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) return 0; - } - mutex_unlock(&fs_info->qgroup_ioctl_lock); /* * We are going to commit transaction, see btrfs_commit_transaction() @@ -1504,7 +1510,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, /* check if there is a file/dir which has the same name. */ dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, - btrfs_ino(parent_inode), + btrfs_ino(BTRFS_I(parent_inode)), dentry->d_name.name, dentry->d_name.len, 0); if (dir_item != NULL && !IS_ERR(dir_item)) { @@ -1598,7 +1604,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, */ ret = btrfs_add_root_ref(trans, fs_info, objectid, parent_root->root_key.objectid, - btrfs_ino(parent_inode), index, + btrfs_ino(BTRFS_I(parent_inode)), index, dentry->d_name.name, dentry->d_name.len); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1940,7 +1946,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) int ret; /* Stop the commit early if ->aborted is set */ - if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { + if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; btrfs_end_transaction(trans); return ret; @@ -2080,7 +2086,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) atomic_read(&cur_trans->num_writers) == 1); /* ->aborted might be set after the previous check, so check it */ - if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { + if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; goto scrub_continue; } @@ -2194,14 +2200,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * The tasks which save the space cache and inode cache may also * update ->aborted, check it. */ - if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { + if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->reloc_mutex); goto scrub_continue; } - btrfs_prepare_extent_commit(trans, fs_info); + btrfs_prepare_extent_commit(fs_info); cur_trans = fs_info->running_transaction; @@ -2251,7 +2257,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) goto scrub_continue; } - ret = write_ctree_super(trans, fs_info, 0); + ret = write_all_supers(fs_info, 0); if (ret) { mutex_unlock(&fs_info->tree_log_mutex); goto scrub_continue; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index eeffff84f280..3806853cde08 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -97,7 +97,7 @@ #define LOG_WALK_REPLAY_ALL 3 static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, const loff_t start, const loff_t end, @@ -631,8 +631,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * file. This must be done before the btrfs_drop_extents run * so we don't try to drop this extent. */ - ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), - start, 0); + ret = btrfs_lookup_file_extent(trans, root, path, + btrfs_ino(BTRFS_I(inode)), start, 0); if (ret == 0 && (found_type == BTRFS_FILE_EXTENT_REG || @@ -843,7 +843,7 @@ out: static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct inode *dir, + struct btrfs_inode *dir, struct btrfs_dir_item *di) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -875,7 +875,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, if (ret) goto out; - ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); + ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name, + name_len); if (ret) goto out; else @@ -991,8 +992,8 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_root *log_root, - struct inode *dir, struct inode *inode, - struct extent_buffer *eb, + struct btrfs_inode *dir, + struct btrfs_inode *inode, u64 inode_objectid, u64 parent_objectid, u64 ref_index, char *name, int namelen, int *search_done) @@ -1047,12 +1048,11 @@ again: parent_objectid, victim_name, victim_name_len)) { - inc_nlink(inode); + inc_nlink(&inode->vfs_inode); btrfs_release_path(path); - ret = btrfs_unlink_inode(trans, root, dir, - inode, victim_name, - victim_name_len); + ret = btrfs_unlink_inode(trans, root, dir, inode, + victim_name, victim_name_len); kfree(victim_name); if (ret) return ret; @@ -1115,16 +1115,16 @@ again: victim_name_len)) { ret = -ENOENT; victim_parent = read_one_inode(root, - parent_objectid); + parent_objectid); if (victim_parent) { - inc_nlink(inode); + inc_nlink(&inode->vfs_inode); btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, - victim_parent, - inode, - victim_name, - victim_name_len); + BTRFS_I(victim_parent), + inode, + victim_name, + victim_name_len); if (!ret) ret = btrfs_run_delayed_items( trans, @@ -1295,8 +1295,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, goto out; /* if we already have a perfect match, we're done */ - if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), - ref_index, name, namelen)) { + if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), + btrfs_ino(BTRFS_I(inode)), ref_index, + name, namelen)) { /* * look for a conflicting back reference in the * metadata. if we find one we have to unlink that name @@ -1307,7 +1308,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (!search_done) { ret = __add_inode_ref(trans, root, path, log, - dir, inode, eb, + BTRFS_I(dir), + BTRFS_I(inode), inode_objectid, parent_objectid, ref_index, name, namelen, @@ -1360,7 +1362,7 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, } static int count_inode_extrefs(struct btrfs_root *root, - struct inode *inode, struct btrfs_path *path) + struct btrfs_inode *inode, struct btrfs_path *path) { int ret = 0; int name_len; @@ -1404,7 +1406,7 @@ static int count_inode_extrefs(struct btrfs_root *root, } static int count_inode_refs(struct btrfs_root *root, - struct inode *inode, struct btrfs_path *path) + struct btrfs_inode *inode, struct btrfs_path *path) { int ret; struct btrfs_key key; @@ -1477,19 +1479,19 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; u64 nlink = 0; - u64 ino = btrfs_ino(inode); + u64 ino = btrfs_ino(BTRFS_I(inode)); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = count_inode_refs(root, inode, path); + ret = count_inode_refs(root, BTRFS_I(inode), path); if (ret < 0) goto out; nlink = ret; - ret = count_inode_extrefs(root, inode, path); + ret = count_inode_extrefs(root, BTRFS_I(inode), path); if (ret < 0) goto out; @@ -1769,7 +1771,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, if (!exists) goto out; - ret = drop_one_dir_item(trans, root, path, dir, dst_di); + ret = drop_one_dir_item(trans, root, path, BTRFS_I(dir), dst_di); if (ret) goto out; @@ -2052,8 +2054,8 @@ again: } inc_nlink(inode); - ret = btrfs_unlink_inode(trans, root, dir, inode, - name, name_len); + ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), + BTRFS_I(inode), name, name_len); if (!ret) ret = btrfs_run_delayed_items(trans, fs_info); kfree(name); @@ -2469,7 +2471,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, if (trans) { btrfs_tree_lock(next); btrfs_set_lock_blocking(next); - clean_tree_block(trans, fs_info, next); + clean_tree_block(fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); } @@ -2549,7 +2551,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, if (trans) { btrfs_tree_lock(next); btrfs_set_lock_blocking(next); - clean_tree_block(trans, fs_info, next); + clean_tree_block(fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); } @@ -2627,7 +2629,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, if (trans) { btrfs_tree_lock(next); btrfs_set_lock_blocking(next); - clean_tree_block(trans, fs_info, next); + clean_tree_block(fs_info, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); } @@ -2958,7 +2960,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * the running transaction open, so a full commit can't hop * in and cause problems either. */ - ret = write_ctree_super(trans, fs_info, 1); + ret = write_all_supers(fs_info, 1); if (ret) { btrfs_set_log_full_commit(fs_info, trans); btrfs_abort_transaction(trans, ret); @@ -3084,7 +3086,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - struct inode *dir, u64 index) + struct btrfs_inode *dir, u64 index) { struct btrfs_root *log; struct btrfs_dir_item *di; @@ -3094,14 +3096,14 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int bytes_del = 0; u64 dir_ino = btrfs_ino(dir); - if (BTRFS_I(dir)->logged_trans < trans->transid) + if (dir->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); if (ret) return 0; - mutex_lock(&BTRFS_I(dir)->log_mutex); + mutex_lock(&dir->log_mutex); log = root->log_root; path = btrfs_alloc_path(); @@ -3176,7 +3178,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, fail: btrfs_free_path(path); out_unlock: - mutex_unlock(&BTRFS_I(dir)->log_mutex); + mutex_unlock(&dir->log_mutex); if (ret == -ENOSPC) { btrfs_set_log_full_commit(root->fs_info, trans); ret = 0; @@ -3192,25 +3194,25 @@ out_unlock: int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - struct inode *inode, u64 dirid) + struct btrfs_inode *inode, u64 dirid) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log; u64 index; int ret; - if (BTRFS_I(inode)->logged_trans < trans->transid) + if (inode->logged_trans < trans->transid) return 0; ret = join_running_log_trans(root); if (ret) return 0; log = root->log_root; - mutex_lock(&BTRFS_I(inode)->log_mutex); + mutex_lock(&inode->log_mutex); ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), dirid, &index); - mutex_unlock(&BTRFS_I(inode)->log_mutex); + mutex_unlock(&inode->log_mutex); if (ret == -ENOSPC) { btrfs_set_log_full_commit(fs_info, trans); ret = 0; @@ -3260,7 +3262,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, * to replay anything deleted before the fsync */ static noinline int log_dir_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path, int key_type, struct btrfs_log_ctx *ctx, @@ -3450,7 +3452,7 @@ done: * key logged by this transaction. */ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path, struct btrfs_log_ctx *ctx) @@ -3464,9 +3466,8 @@ again: min_key = 0; max_key = 0; while (1) { - ret = log_dir_items(trans, root, inode, path, - dst_path, key_type, ctx, min_key, - &max_key); + ret = log_dir_items(trans, root, inode, path, dst_path, key_type, + ctx, min_key, &max_key); if (ret) return ret; if (max_key == (u64)-1) @@ -3595,34 +3596,34 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, static int log_inode_item(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *path, - struct inode *inode) + struct btrfs_inode *inode) { struct btrfs_inode_item *inode_item; int ret; ret = btrfs_insert_empty_item(trans, log, path, - &BTRFS_I(inode)->location, - sizeof(*inode_item)); + &inode->location, sizeof(*inode_item)); if (ret && ret != -EEXIST) return ret; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0); + fill_inode_item(trans, path->nodes[0], inode_item, &inode->vfs_inode, + 0, 0); btrfs_release_path(path); return 0; } static noinline int copy_items(struct btrfs_trans_handle *trans, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *dst_path, struct btrfs_path *src_path, u64 *last_extent, int start_slot, int nr, int inode_only, u64 logged_isize) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); unsigned long src_offset; unsigned long dst_offset; - struct btrfs_root *log = BTRFS_I(inode)->root->log_root; + struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; @@ -3633,7 +3634,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, char *ins_data; int i; struct list_head ordered_sums; - int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; + int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; bool has_extents = false; bool need_find_last_extent = true; bool done = false; @@ -3675,7 +3676,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, dst_path->slots[0], struct btrfs_inode_item); fill_inode_item(trans, dst_path->nodes[0], inode_item, - inode, inode_only == LOG_INODE_EXISTS, + &inode->vfs_inode, + inode_only == LOG_INODE_EXISTS, logged_isize); } else { copy_extent_buffer(dst_path->nodes[0], src, dst_offset, @@ -3783,7 +3785,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (need_find_last_extent) { u64 len; - ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path); + ret = btrfs_prev_leaf(inode->root, src_path); if (ret < 0) return ret; if (ret) @@ -3825,8 +3827,8 @@ fill_holes: if (need_find_last_extent) { /* btrfs_prev_leaf could return 1 without releasing the path */ btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key, - src_path, 0, 0); + ret = btrfs_search_slot(NULL, inode->root, &first_key, + src_path, 0, 0); if (ret < 0) return ret; ASSERT(ret == 0); @@ -3846,7 +3848,7 @@ fill_holes: u64 extent_end; if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path); + ret = btrfs_next_leaf(inode->root, src_path); if (ret < 0) return ret; ASSERT(ret == 0); @@ -3881,8 +3883,7 @@ fill_holes: offset = *last_extent; len = key.offset - *last_extent; ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), - offset, 0, 0, len, 0, len, 0, - 0, 0); + offset, 0, 0, len, 0, len, 0, 0, 0); if (ret) break; *last_extent = extent_end; @@ -4055,7 +4056,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, } static int log_one_extent(struct btrfs_trans_handle *trans, - struct inode *inode, struct btrfs_root *root, + struct btrfs_inode *inode, struct btrfs_root *root, const struct extent_map *em, struct btrfs_path *path, const struct list_head *logged_list, @@ -4072,8 +4073,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans, int extent_inserted = 0; bool ordered_io_err = false; - ret = wait_ordered_extents(trans, inode, root, em, logged_list, - &ordered_io_err); + ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em, + logged_list, &ordered_io_err); if (ret) return ret; @@ -4084,7 +4085,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_init_map_token(&token); - ret = __btrfs_drop_extents(trans, log, inode, path, em->start, + ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start, em->start + em->len, NULL, 0, 1, sizeof(*fi), &extent_inserted); if (ret) @@ -4150,7 +4151,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path, struct list_head *logged_list, struct btrfs_log_ctx *ctx, @@ -4159,14 +4160,14 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, { struct extent_map *em, *n; struct list_head extents; - struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; + struct extent_map_tree *tree = &inode->extent_tree; u64 test_gen; int ret = 0; int num = 0; INIT_LIST_HEAD(&extents); - down_write(&BTRFS_I(inode)->dio_sem); + down_write(&inode->dio_sem); write_lock(&tree->lock); test_gen = root->fs_info->last_trans_committed; @@ -4206,7 +4207,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, * without writing to the log tree and the fsync must report the * file data write error and not commit the current transaction. */ - ret = filemap_check_errors(inode->i_mapping); + ret = filemap_check_errors(inode->vfs_inode.i_mapping); if (ret) ctx->io_err = ret; process: @@ -4235,13 +4236,13 @@ process: } WARN_ON(!list_empty(&extents)); write_unlock(&tree->lock); - up_write(&BTRFS_I(inode)->dio_sem); + up_write(&inode->dio_sem); btrfs_release_path(path); return ret; } -static int logged_inode_size(struct btrfs_root *log, struct inode *inode, +static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, struct btrfs_path *path, u64 *size_ret) { struct btrfs_key key; @@ -4279,7 +4280,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, */ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path) { @@ -4374,7 +4375,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, */ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4385,7 +4386,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_root *log = root->log_root; const u64 ino = btrfs_ino(inode); - const u64 i_size = i_size_read(inode); + const u64 i_size = i_size_read(&inode->vfs_inode); if (!btrfs_fs_incompat(fs_info, NO_HOLES)) return 0; @@ -4495,7 +4496,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, static int btrfs_check_ref_name_override(struct extent_buffer *eb, const int slot, const struct btrfs_key *key, - struct inode *inode, + struct btrfs_inode *inode, u64 *other_ino) { int ret; @@ -4551,9 +4552,8 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, } read_extent_buffer(eb, name, name_ptr, this_name_len); - di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root, - search_path, parent, - name, this_name_len, 0); + di = btrfs_lookup_dir_item(NULL, inode->root, search_path, + parent, name, this_name_len, 0); if (di && !IS_ERR(di)) { struct btrfs_key di_key; @@ -4596,7 +4596,7 @@ out: * This handles both files and directories. */ static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, const loff_t start, const loff_t end, @@ -4618,7 +4618,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, int ins_nr; bool fast_search = false; u64 ino = btrfs_ino(inode); - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map_tree *em_tree = &inode->extent_tree; u64 logged_isize = 0; bool need_log_inode_item = true; @@ -4639,9 +4639,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, /* today the code can only do partial logging of directories */ - if (S_ISDIR(inode->i_mode) || + if (S_ISDIR(inode->vfs_inode.i_mode) || (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags) && + &inode->runtime_flags) && inode_only >= LOG_INODE_EXISTS)) max_key.type = BTRFS_XATTR_ITEM_KEY; else @@ -4654,8 +4654,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * order for the log replay code to mark inodes for link count * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items). */ - if (S_ISDIR(inode->i_mode) || - BTRFS_I(inode)->generation > fs_info->last_trans_committed) + if (S_ISDIR(inode->vfs_inode.i_mode) || + inode->generation > fs_info->last_trans_committed) ret = btrfs_commit_inode_delayed_items(trans, inode); else ret = btrfs_commit_inode_delayed_inode(inode); @@ -4668,17 +4668,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (inode_only == LOG_OTHER_INODE) { inode_only = LOG_INODE_EXISTS; - mutex_lock_nested(&BTRFS_I(inode)->log_mutex, - SINGLE_DEPTH_NESTING); + mutex_lock_nested(&inode->log_mutex, SINGLE_DEPTH_NESTING); } else { - mutex_lock(&BTRFS_I(inode)->log_mutex); + mutex_lock(&inode->log_mutex); } /* * a brute force approach to making sure we get the most uptodate * copies of everything. */ - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->vfs_inode.i_mode)) { int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; if (inode_only == LOG_INODE_EXISTS) @@ -4699,31 +4698,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * (zeroes), as if an expanding truncate happened, * instead of getting a file of 4Kb only. */ - err = logged_inode_size(log, inode, path, - &logged_isize); + err = logged_inode_size(log, inode, path, &logged_isize); if (err) goto out_unlock; } if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags)) { + &inode->runtime_flags)) { if (inode_only == LOG_INODE_EXISTS) { max_key.type = BTRFS_XATTR_ITEM_KEY; ret = drop_objectid_items(trans, log, path, ino, max_key.type); } else { clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags); + &inode->runtime_flags); clear_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); + &inode->runtime_flags); while(1) { ret = btrfs_truncate_inode_items(trans, - log, inode, 0, 0); + log, &inode->vfs_inode, 0, 0); if (ret != -EAGAIN) break; } } } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags) || + &inode->runtime_flags) || inode_only == LOG_INODE_EXISTS) { if (inode_only == LOG_INODE_ALL) fast_search = true; @@ -4764,18 +4762,17 @@ again: if ((min_key.type == BTRFS_INODE_REF_KEY || min_key.type == BTRFS_INODE_EXTREF_KEY) && - BTRFS_I(inode)->generation == trans->transid) { + inode->generation == trans->transid) { u64 other_ino = 0; ret = btrfs_check_ref_name_override(path->nodes[0], - path->slots[0], - &min_key, inode, - &other_ino); + path->slots[0], &min_key, inode, + &other_ino); if (ret < 0) { err = ret; goto out_unlock; } else if (ret > 0 && ctx && - other_ino != btrfs_ino(ctx->inode)) { + other_ino != btrfs_ino(BTRFS_I(ctx->inode))) { struct btrfs_key inode_key; struct inode *other_inode; @@ -4823,9 +4820,10 @@ again: * update the log with the new name before we * unpin it. */ - err = btrfs_log_inode(trans, root, other_inode, - LOG_OTHER_INODE, - 0, LLONG_MAX, ctx); + err = btrfs_log_inode(trans, root, + BTRFS_I(other_inode), + LOG_OTHER_INODE, 0, LLONG_MAX, + ctx); iput(other_inode); if (err) goto out_unlock; @@ -4979,25 +4977,25 @@ log_extents: write_unlock(&em_tree->lock); } - if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { + if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) { ret = log_directory_changes(trans, root, inode, path, dst_path, - ctx); + ctx); if (ret) { err = ret; goto out_unlock; } } - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->logged_trans = trans->transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; - spin_unlock(&BTRFS_I(inode)->lock); + spin_lock(&inode->lock); + inode->logged_trans = trans->transid; + inode->last_log_commit = inode->last_sub_trans; + spin_unlock(&inode->lock); out_unlock: if (unlikely(err)) btrfs_put_logged_extents(&logged_list); else btrfs_submit_logged_extents(&logged_list, log); - mutex_unlock(&BTRFS_I(inode)->log_mutex); + mutex_unlock(&inode->log_mutex); btrfs_free_path(path); btrfs_free_path(dst_path); @@ -5021,13 +5019,13 @@ out_unlock: * we logged the inode or it might have also done the unlink). */ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, - struct inode *inode) + struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_fs_info *fs_info = inode->root->fs_info; bool ret = false; - mutex_lock(&BTRFS_I(inode)->log_mutex); - if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { + mutex_lock(&inode->log_mutex); + if (inode->last_unlink_trans > fs_info->last_trans_committed) { /* * Make sure any commits to the log are forced to be full * commits. @@ -5035,7 +5033,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_log_full_commit(fs_info, trans); ret = true; } - mutex_unlock(&BTRFS_I(inode)->log_mutex); + mutex_unlock(&inode->log_mutex); return ret; } @@ -5084,7 +5082,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, BTRFS_I(inode)->logged_trans = trans->transid; smp_mb(); - if (btrfs_must_commit_transaction(trans, inode)) { + if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) { ret = 1; break; } @@ -5094,7 +5092,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (IS_ROOT(parent)) { inode = d_inode(parent); - if (btrfs_must_commit_transaction(trans, inode)) + if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) ret = 1; break; } @@ -5159,7 +5157,7 @@ struct btrfs_dir_list { */ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *start_inode, + struct btrfs_inode *start_inode, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -5237,7 +5235,7 @@ process_leaf: goto next_dir_inode; } - if (btrfs_inode_in_log(di_inode, trans->transid)) { + if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) { iput(di_inode); break; } @@ -5245,10 +5243,10 @@ process_leaf: ctx->log_new_dentries = false; if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) log_mode = LOG_INODE_ALL; - ret = btrfs_log_inode(trans, root, di_inode, + ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode), log_mode, 0, LLONG_MAX, ctx); if (!ret && - btrfs_must_commit_transaction(trans, di_inode)) + btrfs_must_commit_transaction(trans, BTRFS_I(di_inode))) ret = 1; iput(di_inode); if (ret) @@ -5297,7 +5295,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(inode)->root; - const u64 ino = btrfs_ino(inode); + const u64 ino = btrfs_ino(BTRFS_I(inode)); path = btrfs_alloc_path(); if (!path) @@ -5365,14 +5363,14 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, if (ctx) ctx->log_new_dentries = false; - ret = btrfs_log_inode(trans, root, dir_inode, + ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode), LOG_INODE_ALL, 0, LLONG_MAX, ctx); if (!ret && - btrfs_must_commit_transaction(trans, dir_inode)) + btrfs_must_commit_transaction(trans, BTRFS_I(dir_inode))) ret = 1; if (!ret && ctx && ctx->log_new_dentries) ret = log_new_dir_dentries(trans, root, - dir_inode, ctx); + BTRFS_I(dir_inode), ctx); iput(dir_inode); if (ret) goto out; @@ -5436,7 +5434,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - if (btrfs_inode_in_log(inode, trans->transid)) { + if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; } @@ -5445,7 +5443,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx); + ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only, + start, end, ctx); if (ret) goto end_trans; @@ -5521,7 +5520,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, break; if (BTRFS_I(inode)->generation > last_committed) { - ret = btrfs_log_inode(trans, root, inode, + ret = btrfs_log_inode(trans, root, BTRFS_I(inode), LOG_INODE_EXISTS, 0, LLONG_MAX, ctx); if (ret) @@ -5535,7 +5534,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, old_parent = parent; } if (log_dentries) - ret = log_new_dir_dentries(trans, root, orig_inode, ctx); + ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx); else ret = 0; end_trans: @@ -5730,7 +5729,7 @@ error: * inodes, etc) are done. */ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, int for_rename) { /* @@ -5743,23 +5742,23 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * into the file. When the file is logged we check it and * don't log the parents if the file is fully on disk. */ - mutex_lock(&BTRFS_I(inode)->log_mutex); - BTRFS_I(inode)->last_unlink_trans = trans->transid; - mutex_unlock(&BTRFS_I(inode)->log_mutex); + mutex_lock(&inode->log_mutex); + inode->last_unlink_trans = trans->transid; + mutex_unlock(&inode->log_mutex); /* * if this directory was already logged any new * names for this file/dir will get recorded */ smp_mb(); - if (BTRFS_I(dir)->logged_trans == trans->transid) + if (dir->logged_trans == trans->transid) return; /* * if the inode we're about to unlink was logged, * the log will be properly updated for any new names */ - if (BTRFS_I(inode)->logged_trans == trans->transid) + if (inode->logged_trans == trans->transid) return; /* @@ -5776,9 +5775,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, return; record: - mutex_lock(&BTRFS_I(dir)->log_mutex); - BTRFS_I(dir)->last_unlink_trans = trans->transid; - mutex_unlock(&BTRFS_I(dir)->log_mutex); + mutex_lock(&dir->log_mutex); + dir->last_unlink_trans = trans->transid; + mutex_unlock(&dir->log_mutex); } /* @@ -5794,11 +5793,11 @@ record: * parent root and tree of tree roots trees, etc) are done. */ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, - struct inode *dir) + struct btrfs_inode *dir) { - mutex_lock(&BTRFS_I(dir)->log_mutex); - BTRFS_I(dir)->last_unlink_trans = trans->transid; - mutex_unlock(&BTRFS_I(dir)->log_mutex); + mutex_lock(&dir->log_mutex); + dir->last_unlink_trans = trans->transid; + mutex_unlock(&dir->log_mutex); } /* @@ -5809,30 +5808,28 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full transaction commit is required. */ int btrfs_log_new_name(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *old_dir, + struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root * root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_root *root = inode->root; /* * this will force the logging code to walk the dentry chain * up for the file */ - if (S_ISREG(inode->i_mode)) - BTRFS_I(inode)->last_unlink_trans = trans->transid; + if (S_ISREG(inode->vfs_inode.i_mode)) + inode->last_unlink_trans = trans->transid; /* * if this inode hasn't been logged and directory we're renaming it * from hasn't been logged, we don't need to log it */ - if (BTRFS_I(inode)->logged_trans <= - fs_info->last_trans_committed && - (!old_dir || BTRFS_I(old_dir)->logged_trans <= - fs_info->last_trans_committed)) + if (inode->logged_trans <= fs_info->last_trans_committed && + (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed)) return 0; - return btrfs_log_inode_parent(trans, root, inode, parent, 0, + return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0, LLONG_MAX, 1, NULL); } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index ab858e31ccbc..483027f9a7f4 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -48,13 +48,13 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans) { - ACCESS_ONCE(fs_info->last_trans_log_full_commit) = trans->transid; + WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid); } static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans) { - return ACCESS_ONCE(fs_info->last_trans_log_full_commit) == + return READ_ONCE(fs_info->last_trans_log_full_commit) == trans->transid; } @@ -72,19 +72,19 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - struct inode *dir, u64 index); + struct btrfs_inode *dir, u64 index); int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - struct inode *inode, u64 dirid); + struct btrfs_inode *inode, u64 dirid); void btrfs_end_log_trans(struct btrfs_root *root); int btrfs_pin_log_trans(struct btrfs_root *root); void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, - struct inode *dir, struct inode *inode, + struct btrfs_inode *dir, struct btrfs_inode *inode, int for_rename); void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, - struct inode *dir); + struct btrfs_inode *dir); int btrfs_log_new_name(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *old_dir, + struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent); #endif diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index b1434bb57e36..d8edf164f81c 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -52,13 +52,13 @@ void ulist_init(struct ulist *ulist) } /** - * ulist_fini - free up additionally allocated memory for the ulist + * ulist_release - free up additionally allocated memory for the ulist * @ulist: the ulist from which to free the additional memory * * This is useful in cases where the base 'struct ulist' has been statically * allocated. */ -static void ulist_fini(struct ulist *ulist) +void ulist_release(struct ulist *ulist) { struct ulist_node *node; struct ulist_node *next; @@ -79,7 +79,7 @@ static void ulist_fini(struct ulist *ulist) */ void ulist_reinit(struct ulist *ulist) { - ulist_fini(ulist); + ulist_release(ulist); ulist_init(ulist); } @@ -105,13 +105,13 @@ struct ulist *ulist_alloc(gfp_t gfp_mask) * ulist_free - free dynamically allocated ulist * @ulist: ulist to free * - * It is not necessary to call ulist_fini before. + * It is not necessary to call ulist_release before. */ void ulist_free(struct ulist *ulist) { if (!ulist) return; - ulist_fini(ulist); + ulist_release(ulist); kfree(ulist); } diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index a01a2c45825f..53c913632733 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -19,9 +19,6 @@ * */ struct ulist_iterator { -#ifdef CONFIG_BTRFS_DEBUG - int i; -#endif struct list_head *cur_list; /* hint to start search */ }; @@ -32,10 +29,6 @@ struct ulist_node { u64 val; /* value to store */ u64 aux; /* auxiliary value saved along with the val */ -#ifdef CONFIG_BTRFS_DEBUG - int seqnum; /* sequence number this node is added */ -#endif - struct list_head list; /* used to link node */ struct rb_node rb_node; /* used to speed up search */ }; @@ -51,6 +44,7 @@ struct ulist { }; void ulist_init(struct ulist *ulist); +void ulist_release(struct ulist *ulist); void ulist_reinit(struct ulist *ulist); struct ulist *ulist_alloc(gfp_t gfp_mask); void ulist_free(struct ulist *ulist); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b2e70073a10d..13e55d13045d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -134,8 +134,7 @@ const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = { }; static int init_first_rw_device(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_device *device); + struct btrfs_fs_info *fs_info); static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info); static void __btrfs_reset_dev_stats(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); @@ -2440,7 +2439,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path) if (seeding_dev) { mutex_lock(&fs_info->chunk_mutex); - ret = init_first_rw_device(trans, fs_info, device); + ret = init_first_rw_device(trans, fs_info); mutex_unlock(&fs_info->chunk_mutex); if (ret) { btrfs_abort_transaction(trans, ret); @@ -4584,8 +4583,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) / sizeof(struct btrfs_stripe) + 1) static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 start, - u64 type) + u64 start, u64 type) { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_fs_devices *fs_devices = info->fs_devices; @@ -5009,12 +5007,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ASSERT(mutex_is_locked(&fs_info->chunk_mutex)); chunk_offset = find_next_chunk(fs_info); - return __btrfs_alloc_chunk(trans, fs_info, chunk_offset, type); + return __btrfs_alloc_chunk(trans, chunk_offset, type); } static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_device *device) + struct btrfs_fs_info *fs_info) { struct btrfs_root *extent_root = fs_info->extent_root; u64 chunk_offset; @@ -5024,14 +5021,13 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, chunk_offset = find_next_chunk(fs_info); alloc_profile = btrfs_get_alloc_profile(extent_root, 0); - ret = __btrfs_alloc_chunk(trans, fs_info, chunk_offset, alloc_profile); + ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile); if (ret) return ret; sys_chunk_offset = find_next_chunk(fs_info); alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); - ret = __btrfs_alloc_chunk(trans, fs_info, sys_chunk_offset, - alloc_profile); + ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile); return ret; } diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 9621c7f2503e..b3cbf80c5acf 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -47,8 +47,8 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, return -ENOMEM; /* lookup the xattr by name */ - di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name, - strlen(name), 0); + di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(BTRFS_I(inode)), + name, strlen(name), 0); if (!di) { ret = -ENODATA; goto out; @@ -108,8 +108,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans, path->skip_release_on_error = 1; if (!value) { - di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), - name, name_len, -1); + di = btrfs_lookup_xattr(trans, root, path, + btrfs_ino(BTRFS_I(inode)), name, name_len, -1); if (!di && (flags & XATTR_REPLACE)) ret = -ENODATA; else if (IS_ERR(di)) @@ -128,8 +128,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans, */ if (flags & XATTR_REPLACE) { ASSERT(inode_is_locked(inode)); - di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), - name, name_len, 0); + di = btrfs_lookup_xattr(NULL, root, path, + btrfs_ino(BTRFS_I(inode)), name, name_len, 0); if (!di) ret = -ENODATA; else if (IS_ERR(di)) @@ -140,7 +140,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans, di = NULL; } - ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), + ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(BTRFS_I(inode)), name, name_len, value, size); if (ret == -EOVERFLOW) { /* @@ -278,7 +278,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) * NOTE: we set key.offset = 0; because we want to start with the * first xattr that we find and walk forward */ - key.objectid = btrfs_ino(inode); + key.objectid = btrfs_ino(BTRFS_I(inode)); key.type = BTRFS_XATTR_ITEM_KEY; key.offset = 0; diff --git a/fs/buffer.c b/fs/buffer.c index 0e87401cf335..28484b3ebc98 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2395,7 +2395,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, loff_t pos, loff_t *bytes) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; + unsigned int blocksize = i_blocksize(inode); struct page *page; void *fsdata; pgoff_t index, curidx; @@ -2475,8 +2475,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping, get_block_t *get_block, loff_t *bytes) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; - unsigned zerofrom; + unsigned int blocksize = i_blocksize(inode); + unsigned int zerofrom; int err; err = cont_expand_zero(file, mapping, pos, bytes); @@ -2838,7 +2838,7 @@ int nobh_truncate_page(struct address_space *mapping, struct buffer_head map_bh; int err; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); length = offset & (blocksize - 1); /* Block boundary? Nothing to do */ @@ -2916,7 +2916,7 @@ int block_truncate_page(struct address_space *mapping, struct buffer_head *bh; int err; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); length = offset & (blocksize - 1); /* Block boundary? Nothing to do */ @@ -3028,7 +3028,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, struct inode *inode = mapping->host; tmp.b_state = 0; tmp.b_blocknr = 0; - tmp.b_size = 1 << inode->i_blkbits; + tmp.b_size = i_blocksize(inode); get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e4b066cd912a..f297a9e18642 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) nr_pages = i; if (nr_pages > 0) { len = nr_pages << PAGE_SHIFT; + osd_req_op_extent_update(req, 0, len); break; } goto out_pages; @@ -751,7 +752,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct pagevec pvec; int done = 0; int rc = 0; - unsigned wsize = 1 << inode->i_blkbits; + unsigned int wsize = i_blocksize(inode); struct ceph_osd_request *req = NULL; int do_sync = 0; loff_t snap_size, i_size; @@ -771,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); - if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (ci->i_wrbuffer_ref > 0) { pr_warn_ratelimited( "writepage_start %p %lld forced umount\n", @@ -1017,8 +1018,7 @@ new_request: &ci->i_layout, vino, offset, &len, 0, num_ops, CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ONDISK, + CEPH_OSD_FLAG_WRITE, snapc, truncate_seq, truncate_size, false); if (IS_ERR(req)) { @@ -1028,8 +1028,7 @@ new_request: min(num_ops, CEPH_OSD_SLAB_OPS), CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ONDISK, + CEPH_OSD_FLAG_WRITE, snapc, truncate_seq, truncate_size, true); BUG_ON(IS_ERR(req)); @@ -1194,7 +1193,7 @@ static int ceph_update_writeable_page(struct file *file, int r; struct ceph_snap_context *snapc, *oldest; - if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { dout(" page %p forced umount\n", page); unlock_page(page); return -EIO; @@ -1386,8 +1385,9 @@ static void ceph_restore_sigs(sigset_t *oldset) /* * vm ops */ -static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ceph_filemap_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; @@ -1416,7 +1416,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || ci->i_inline_version == CEPH_INLINE_NONE) { current->journal_info = vma->vm_file; - ret = filemap_fault(vma, vmf); + ret = filemap_fault(vmf); current->journal_info = NULL; } else ret = -EAGAIN; @@ -1477,8 +1477,9 @@ out_restore: /* * Reuse write_begin here for simplicity. */ -static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ceph_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; @@ -1679,8 +1680,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 0, 1, - CEPH_OSD_OP_CREATE, - CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE, NULL, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1697,8 +1697,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 1, 3, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { @@ -1871,7 +1870,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, goto out_unlock; } - wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK; + wr_req->r_flags = CEPH_OSD_FLAG_WRITE; osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc); ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid); diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 5bc5d37b1217..4e7421caf380 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable, inode); if (fscache_cookie_enabled(ci->fscache)) { - dout("fscache_file_set_cookie %p %p enabing cache\n", + dout("fscache_file_set_cookie %p %p enabling cache\n", inode, filp); } } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 94fd76d04683..cd966f276a8d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) /* * Return caps we have registered with the MDS(s) as 'wanted'. */ -int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) +int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check) { struct ceph_cap *cap; struct rb_node *p; @@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); - if (!__cap_is_valid(cap)) + if (check && !__cap_is_valid(cap)) continue; if (cap == ci->i_auth_cap) mds_wanted |= cap->mds_wanted; @@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, delayed = 1; } ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH); + if (want & ~cap->mds_wanted) { + /* user space may open/close single file frequently. + * This avoids droping mds_wanted immediately after + * requesting new mds_wanted. + */ + __cap_set_timeouts(mdsc, ci); + } cap->issued &= retain; /* drop bits we don't want */ if (cap->implemented & ~cap->issued) { @@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); - ceph_sync_write_wait(inode); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) goto out; @@ -2477,23 +2482,22 @@ again: if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) { int mds_wanted; - if (ACCESS_ONCE(mdsc->fsc->mount_state) == + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { dout("get_cap_refs %p forced umount\n", inode); *err = -EIO; ret = 1; goto out_unlock; } - mds_wanted = __ceph_caps_mds_wanted(ci); - if ((mds_wanted & need) != need) { + mds_wanted = __ceph_caps_mds_wanted(ci, false); + if (need & ~(mds_wanted & need)) { dout("get_cap_refs %p caps were dropped" " (session killed?)\n", inode); *err = -ESTALE; ret = 1; goto out_unlock; } - if ((mds_wanted & file_wanted) == - (file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR))) + if (!(file_wanted & ~mds_wanted)) ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED; } @@ -3404,6 +3408,7 @@ retry: tcap->implemented |= issued; if (cap == ci->i_auth_cap) ci->i_auth_cap = tcap; + if (!list_empty(&ci->i_cap_flush_list) && ci->i_auth_cap == tcap) { spin_lock(&mdsc->cap_dirty_lock); @@ -3417,9 +3422,18 @@ retry: } else if (tsession) { /* add placeholder for the export tagert */ int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; + tcap = new_cap; ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); + if (!list_empty(&ci->i_cap_flush_list) && + ci->i_auth_cap == tcap) { + spin_lock(&mdsc->cap_dirty_lock); + list_move_tail(&ci->i_flushing_item, + &tcap->session->s_cap_flushing); + spin_unlock(&mdsc->cap_dirty_lock); + } + __ceph_remove_cap(cap, false); goto out_unlock; } @@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode, } int ceph_encode_dentry_release(void **p, struct dentry *dentry, + struct inode *dir, int mds, int drop, int unless) { - struct inode *dir = d_inode(dentry->d_parent); + struct dentry *parent = NULL; struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); int force = 0; @@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, spin_lock(&dentry->d_lock); if (di->lease_session && di->lease_session->s_mds == mds) force = 1; + if (!dir) { + parent = dget(dentry->d_parent); + dir = d_inode(parent); + } spin_unlock(&dentry->d_lock); ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); + dput(parent); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 39ff678e567f..f2ae393e2c31 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p) seq_printf(s, "%s", ceph_mds_op_name(req->r_op)); - if (req->r_got_unsafe) + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) seq_puts(s, "\t(unsafe)"); else seq_puts(s, "\t"); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8ab1fdf0bd49..3e9ad501addf 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -371,7 +371,7 @@ more: /* hints to request -> mds selection code */ req->r_direct_mode = USE_AUTH_MDS; req->r_direct_hash = ceph_frag_value(frag); - req->r_direct_is_hash = true; + __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); if (fi->last_name) { req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); if (!req->r_path2) { @@ -417,7 +417,7 @@ more: fi->frag = frag; fi->last_readdir = req; - if (req->r_did_prepopulate) { + if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { fi->readdir_cache_idx = req->r_readdir_cache_idx; if (fi->readdir_cache_idx < 0) { /* preclude from marking dir ordered */ @@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, mask |= CEPH_CAP_XATTR_SHARED; req->r_args.getattr.mask = cpu_to_le32(mask); - req->r_locked_dir = dir; + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_handle_snapdir(req, dentry, err); dentry = ceph_finish_lookup(req, dentry, err); @@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, } req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_locked_dir = dir; + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); req->r_dentry_drop = CEPH_CAP_FILE_SHARED; @@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, ceph_mdsc_put_request(req); goto out; } - req->r_locked_dir = dir; + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_dentry_drop = CEPH_CAP_FILE_SHARED; @@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_locked_dir = dir; + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mkdir.mode = cpu_to_le32(mode); req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; @@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); - req->r_locked_dir = dir; + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_SHARED on source inode (mds will lock it) */ @@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) } req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_locked_dir = dir; + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_inode_drop = drop_caps_for_unlink(inode); @@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); req->r_old_dentry_dir = old_dir; - req->r_locked_dir = new_dir; + req->r_parent = new_dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_dentry_drop = CEPH_CAP_FILE_SHARED; @@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) struct inode *dir; if (flags & LOOKUP_RCU) { - parent = ACCESS_ONCE(dentry->d_parent); + parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; @@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; op = ceph_snap(dir) == CEPH_SNAPDIR ? - CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR; + CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (!IS_ERR(req)) { req->r_dentry = dget(dentry); - req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2; + req->r_num_caps = 2; + req->r_parent = dir; mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 180bbef760f2..e8f11fa565c5 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name, req->r_inode = d_inode(child); ihold(d_inode(child)); req->r_ino2 = ceph_vino(d_inode(parent)); - req->r_locked_dir = d_inode(parent); + req->r_parent = d_inode(parent); + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 045d30d26624..26cc95421cca 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file) spin_lock(&ci->i_ceph_lock); if (__ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { - int mds_wanted = __ceph_caps_mds_wanted(ci); + int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); dout("open %p fmode %d want %s issued %s using existing\n", @@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, mask |= CEPH_CAP_XATTR_SHARED; req->r_args.open.mask = cpu_to_le32(mask); - req->r_locked_dir = dir; /* caller holds dir->i_mutex */ + req->r_parent = dir; + set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); @@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work) goto out; } - req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | - CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE; + req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE; ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc); ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid); @@ -794,89 +793,6 @@ out: kfree(aio_work); } -/* - * Write commit request unsafe callback, called to tell us when a - * request is unsafe (that is, in flight--has been handed to the - * messenger to send to its target osd). It is called again when - * we've received a response message indicating the request is - * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request - * is completed early (and unsuccessfully) due to a timeout or - * interrupt. - * - * This is used if we requested both an ACK and ONDISK commit reply - * from the OSD. - */ -static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) -{ - struct ceph_inode_info *ci = ceph_inode(req->r_inode); - - dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid, - unsafe ? "un" : ""); - if (unsafe) { - ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); - spin_lock(&ci->i_unsafe_lock); - list_add_tail(&req->r_unsafe_item, - &ci->i_unsafe_writes); - spin_unlock(&ci->i_unsafe_lock); - - complete_all(&req->r_completion); - } else { - spin_lock(&ci->i_unsafe_lock); - list_del_init(&req->r_unsafe_item); - spin_unlock(&ci->i_unsafe_lock); - ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); - } -} - -/* - * Wait on any unsafe replies for the given inode. First wait on the - * newest request, and make that the upper bound. Then, if there are - * more requests, keep waiting on the oldest as long as it is still older - * than the original request. - */ -void ceph_sync_write_wait(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_writes; - struct ceph_osd_request *req; - u64 last_tid; - - if (!S_ISREG(inode->i_mode)) - return; - - spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - /* set upper bound as _last_ entry in chain */ - - req = list_last_entry(head, struct ceph_osd_request, - r_unsafe_item); - last_tid = req->r_tid; - - do { - ceph_osdc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); - - dout("sync_write_wait on tid %llu (until %llu)\n", - req->r_tid, last_tid); - wait_for_completion(&req->r_done_completion); - ceph_osdc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - /* - * from here on look at first entry in chain, since we - * only want to wait for anything older than last_tid - */ - if (list_empty(head)) - break; - req = list_first_entry(head, struct ceph_osd_request, - r_unsafe_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); -} - static ssize_t ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct ceph_snap_context *snapc, @@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (ret2 < 0) dout("invalidate_inode_pages2_range returned %d\n", ret2); - flags = CEPH_OSD_FLAG_ORDERSNAP | - CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE; + flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE; } else { flags = CEPH_OSD_FLAG_READ; } @@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, if (ret < 0) dout("invalidate_inode_pages2_range returned %d\n", ret); - flags = CEPH_OSD_FLAG_ORDERSNAP | - CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ACK; + flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE; while ((len = iov_iter_count(from)) > 0) { size_t left; @@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, goto out; } - /* get a second commit callback */ - req->r_unsafe_callback = ceph_sync_write_unsafe; req->r_inode = inode; osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, @@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode, ceph_vino(inode), offset, length, 0, 1, op, - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ONDISK, + CEPH_OSD_FLAG_WRITE, NULL, 0, 0, false); if (IS_ERR(req)) { ret = PTR_ERR(req); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5e659d054b40..fd8f771f99b7 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; - INIT_LIST_HEAD(&ci->i_unsafe_writes); INIT_LIST_HEAD(&ci->i_unsafe_dirops); INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock); @@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode) return 1; } -void ceph_evict_inode(struct inode *inode) -{ - /* wait unsafe sync writes */ - ceph_sync_write_wait(inode); - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); -} - static inline blkcnt_t calc_inode_blocks(u64 size) { return (size + (1<<9) - 1) >> 9; @@ -1016,7 +1007,9 @@ out: static void update_dentry_lease(struct dentry *dentry, struct ceph_mds_reply_lease *lease, struct ceph_mds_session *session, - unsigned long from_time) + unsigned long from_time, + struct ceph_vino *tgt_vino, + struct ceph_vino *dir_vino) { struct ceph_dentry_info *di = ceph_dentry(dentry); long unsigned duration = le32_to_cpu(lease->duration_ms); @@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry, long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; struct inode *dir; + /* + * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that + * we expect a negative dentry. + */ + if (!tgt_vino && d_really_is_positive(dentry)) + return; + + if (tgt_vino && (d_really_is_negative(dentry) || + !ceph_ino_compare(d_inode(dentry), tgt_vino))) + return; + spin_lock(&dentry->d_lock); dout("update_dentry_lease %p duration %lu ms ttl %lu\n", dentry, duration, ttl); - /* make lease_rdcache_gen match directory */ dir = d_inode(dentry->d_parent); + /* make sure parent matches dir_vino */ + if (!ceph_ino_compare(dir, dir_vino)) + goto out_unlock; + /* only track leases on regular dentries */ if (ceph_snap(dir) != CEPH_NOSNAP) goto out_unlock; @@ -1108,61 +1115,27 @@ out: * * Called with snap_rwsem (read). */ -int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, - struct ceph_mds_session *session) +int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) { + struct ceph_mds_session *session = req->r_session; struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct inode *in = NULL; - struct ceph_vino vino; + struct ceph_vino tvino, dvino; struct ceph_fs_client *fsc = ceph_sb_to_client(sb); int err = 0; dout("fill_trace %p is_dentry %d is_target %d\n", req, rinfo->head->is_dentry, rinfo->head->is_target); -#if 0 - /* - * Debugging hook: - * - * If we resend completed ops to a recovering mds, we get no - * trace. Since that is very rare, pretend this is the case - * to ensure the 'no trace' handlers in the callers behave. - * - * Fill in inodes unconditionally to avoid breaking cap - * invariants. - */ - if (rinfo->head->op & CEPH_MDS_OP_WRITE) { - pr_info("fill_trace faking empty trace on %lld %s\n", - req->r_tid, ceph_mds_op_name(rinfo->head->op)); - if (rinfo->head->is_dentry) { - rinfo->head->is_dentry = 0; - err = fill_inode(req->r_locked_dir, - &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1); - } - if (rinfo->head->is_target) { - rinfo->head->is_target = 0; - ininfo = rinfo->targeti.in; - vino.ino = le64_to_cpu(ininfo->ino); - vino.snap = le64_to_cpu(ininfo->snapid); - in = ceph_get_inode(sb, vino); - err = fill_inode(in, &rinfo->targeti, NULL, - session, req->r_request_started, - req->r_fmode); - iput(in); - } - } -#endif - if (!rinfo->head->is_target && !rinfo->head->is_dentry) { dout("fill_trace reply is empty!\n"); - if (rinfo->head->result == 0 && req->r_locked_dir) + if (rinfo->head->result == 0 && req->r_parent) ceph_invalidate_dir_request(req); return 0; } if (rinfo->head->is_dentry) { - struct inode *dir = req->r_locked_dir; + struct inode *dir = req->r_parent; if (dir) { err = fill_inode(dir, NULL, @@ -1188,8 +1161,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, dname.name = rinfo->dname; dname.len = rinfo->dname_len; dname.hash = full_name_hash(parent, dname.name, dname.len); - vino.ino = le64_to_cpu(rinfo->targeti.in->ino); - vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); retry_lookup: dn = d_lookup(parent, &dname); dout("d_lookup on parent=%p name=%.*s got %p\n", @@ -1206,8 +1179,8 @@ retry_lookup: } err = 0; } else if (d_really_is_positive(dn) && - (ceph_ino(d_inode(dn)) != vino.ino || - ceph_snap(d_inode(dn)) != vino.snap)) { + (ceph_ino(d_inode(dn)) != tvino.ino || + ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); d_delete(dn); @@ -1221,10 +1194,10 @@ retry_lookup: } if (rinfo->head->is_target) { - vino.ino = le64_to_cpu(rinfo->targeti.in->ino); - vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); - in = ceph_get_inode(sb, vino); + in = ceph_get_inode(sb, tvino); if (IS_ERR(in)) { err = PTR_ERR(in); goto done; @@ -1233,8 +1206,8 @@ retry_lookup: err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, session, req->r_request_started, - (!req->r_aborted && rinfo->head->result == 0) ? - req->r_fmode : -1, + (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && + rinfo->head->result == 0) ? req->r_fmode : -1, &req->r_caps_reservation); if (err < 0) { pr_err("fill_inode badness %p %llx.%llx\n", @@ -1247,8 +1220,9 @@ retry_lookup: * ignore null lease/binding on snapdir ENOENT, or else we * will have trouble splicing in the virtual snapdir later */ - if (rinfo->head->is_dentry && !req->r_aborted && - req->r_locked_dir && + if (rinfo->head->is_dentry && + !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && + test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, fsc->mount_options->snapdir_name, req->r_dentry->d_name.len))) { @@ -1257,17 +1231,19 @@ retry_lookup: * mknod symlink mkdir : null -> new inode * unlink : linked -> null */ - struct inode *dir = req->r_locked_dir; + struct inode *dir = req->r_parent; struct dentry *dn = req->r_dentry; bool have_dir_cap, have_lease; BUG_ON(!dn); BUG_ON(!dir); BUG_ON(d_inode(dn->d_parent) != dir); - BUG_ON(ceph_ino(dir) != - le64_to_cpu(rinfo->diri.in->ino)); - BUG_ON(ceph_snap(dir) != - le64_to_cpu(rinfo->diri.in->snapid)); + + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); + + BUG_ON(ceph_ino(dir) != dvino.ino); + BUG_ON(ceph_snap(dir) != dvino.snap); /* do we have a lease on the whole dir? */ have_dir_cap = @@ -1319,12 +1295,13 @@ retry_lookup: ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); d_delete(dn); - } else { - if (have_lease && d_unhashed(dn)) + } else if (have_lease) { + if (d_unhashed(dn)) d_add(dn, NULL); update_dentry_lease(dn, rinfo->dlease, session, - req->r_request_started); + req->r_request_started, + NULL, &dvino); } goto done; } @@ -1347,15 +1324,19 @@ retry_lookup: have_lease = false; } - if (have_lease) + if (have_lease) { + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); update_dentry_lease(dn, rinfo->dlease, session, - req->r_request_started); + req->r_request_started, + &tvino, &dvino); + } dout(" final dn %p\n", dn); - } else if (!req->r_aborted && - (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || - req->r_op == CEPH_MDS_OP_MKSNAP)) { + } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || + req->r_op == CEPH_MDS_OP_MKSNAP) && + !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { struct dentry *dn = req->r_dentry; - struct inode *dir = req->r_locked_dir; + struct inode *dir = req->r_parent; /* fill out a snapdir LOOKUPSNAP dentry */ BUG_ON(!dn); @@ -1370,6 +1351,26 @@ retry_lookup: goto done; } req->r_dentry = dn; /* may have spliced */ + } else if (rinfo->head->is_dentry) { + struct ceph_vino *ptvino = NULL; + + if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || + le32_to_cpu(rinfo->dlease->duration_ms)) { + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); + + if (rinfo->head->is_target) { + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); + ptvino = &tvino; + } + + update_dentry_lease(req->r_dentry, rinfo->dlease, + session, req->r_request_started, ptvino, + &dvino); + } else { + dout("%s: no dentry lease or dir cap\n", __func__); + } } done: dout("fill_trace done err=%d\n", err); @@ -1478,7 +1479,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, u32 fpos_offset; struct ceph_readdir_cache_control cache_ctl = {}; - if (req->r_aborted) + if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) return readdir_prepopulate_inodes_only(req, session); if (rinfo->hash_order && req->r_path2) { @@ -1523,14 +1524,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, /* FIXME: release caps/leases if error occurs */ for (i = 0; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; - struct ceph_vino vino; + struct ceph_vino tvino, dvino; dname.name = rde->name; dname.len = rde->name_len; dname.hash = full_name_hash(parent, dname.name, dname.len); - vino.ino = le64_to_cpu(rde->inode.in->ino); - vino.snap = le64_to_cpu(rde->inode.in->snapid); + tvino.ino = le64_to_cpu(rde->inode.in->ino); + tvino.snap = le64_to_cpu(rde->inode.in->snapid); if (rinfo->hash_order) { u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, @@ -1559,8 +1560,8 @@ retry_lookup: goto out; } } else if (d_really_is_positive(dn) && - (ceph_ino(d_inode(dn)) != vino.ino || - ceph_snap(d_inode(dn)) != vino.snap)) { + (ceph_ino(d_inode(dn)) != tvino.ino || + ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); d_delete(dn); @@ -1572,7 +1573,7 @@ retry_lookup: if (d_really_is_positive(dn)) { in = d_inode(dn); } else { - in = ceph_get_inode(parent->d_sb, vino); + in = ceph_get_inode(parent->d_sb, tvino); if (IS_ERR(in)) { dout("new_inode badness\n"); d_drop(dn); @@ -1617,8 +1618,9 @@ retry_lookup: ceph_dentry(dn)->offset = rde->offset; + dvino = ceph_vino(d_inode(parent)); update_dentry_lease(dn, rde->lease, req->r_session, - req->r_request_started); + req->r_request_started, &tvino, &dvino); if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { ret = fill_readdir_cache(d_inode(parent), dn, @@ -1632,7 +1634,7 @@ next_item: } out: if (err == 0 && skipped == 0) { - req->r_did_prepopulate = true; + set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags); req->r_readdir_cache_idx = cache_ctl.index; } ceph_readdir_cache_release(&cache_ctl); @@ -1720,7 +1722,7 @@ static void ceph_invalidate_work(struct work_struct *work) mutex_lock(&ci->i_truncate_mutex); - if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n", inode, ceph_ino(inode)); mapping_set_error(inode->i_mapping, -EIO); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 7d752d53353a..4c9c72f26eb9 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg) l.stripe_count = ci->i_layout.stripe_count; l.object_size = ci->i_layout.object_size; l.data_pool = ci->i_layout.pool_id; - l.preferred_osd = (s32)-1; + l.preferred_osd = -1; if (copy_to_user(arg, &l, sizeof(l))) return -EFAULT; } @@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) nl.data_pool = ci->i_layout.pool_id; /* this is obsolete, and always -1 */ - nl.preferred_osd = le64_to_cpu(-1); + nl.preferred_osd = -1; err = __validate_layout(mdsc, &nl); if (err) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c9d2e553a6c4..c681762d76e6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref) ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); iput(req->r_inode); } - if (req->r_locked_dir) - ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); + if (req->r_parent) + ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); iput(req->r_target_inode); if (req->r_dentry) dput(req->r_dentry); @@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc, { dout("__unregister_request %p tid %lld\n", req, req->r_tid); + /* Never leave an unregistered request on an unsafe list! */ + list_del_init(&req->r_unsafe_item); + if (req->r_tid == mdsc->oldest_tid) { struct rb_node *p = rb_next(&req->r_node); mdsc->oldest_tid = 0; @@ -644,13 +647,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc, erase_request(&mdsc->request_tree, req); - if (req->r_unsafe_dir && req->r_got_unsafe) { + if (req->r_unsafe_dir && + test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); spin_lock(&ci->i_unsafe_lock); list_del_init(&req->r_unsafe_dir_item); spin_unlock(&ci->i_unsafe_lock); } - if (req->r_target_inode && req->r_got_unsafe) { + if (req->r_target_inode && + test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); spin_lock(&ci->i_unsafe_lock); list_del_init(&req->r_unsafe_target_item); @@ -668,6 +673,28 @@ static void __unregister_request(struct ceph_mds_client *mdsc, } /* + * Walk back up the dentry tree until we hit a dentry representing a + * non-snapshot inode. We do this using the rcu_read_lock (which must be held + * when calling this) to ensure that the objects won't disappear while we're + * working with them. Once we hit a candidate dentry, we attempt to take a + * reference to it, and return that as the result. + */ +static struct inode *get_nonsnap_parent(struct dentry *dentry) +{ + struct inode *inode = NULL; + + while (dentry && !IS_ROOT(dentry)) { + inode = d_inode_rcu(dentry); + if (!inode || ceph_snap(inode) == CEPH_NOSNAP) + break; + dentry = dentry->d_parent; + } + if (inode) + inode = igrab(inode); + return inode; +} + +/* * Choose mds to send request to next. If there is a hint set in the * request (e.g., due to a prior forward hint from the mds), use that. * Otherwise, consult frag tree and/or caps to identify the @@ -675,19 +702,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc, * * Called under mdsc->mutex. */ -static struct dentry *get_nonsnap_parent(struct dentry *dentry) -{ - /* - * we don't need to worry about protecting the d_parent access - * here because we never renaming inside the snapped namespace - * except to resplice to another snapdir, and either the old or new - * result is a valid result. - */ - while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) - dentry = dentry->d_parent; - return dentry; -} - static int __choose_mds(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { @@ -697,7 +711,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, int mode = req->r_direct_mode; int mds = -1; u32 hash = req->r_direct_hash; - bool is_hash = req->r_direct_is_hash; + bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); /* * is there a specific mds we should try? ignore hint if we have @@ -717,30 +731,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc, inode = NULL; if (req->r_inode) { inode = req->r_inode; + ihold(inode); } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ - struct dentry *parent = req->r_dentry->d_parent; - struct inode *dir = d_inode(parent); + struct dentry *parent; + struct inode *dir; + + rcu_read_lock(); + parent = req->r_dentry->d_parent; + dir = req->r_parent ? : d_inode_rcu(parent); - if (dir->i_sb != mdsc->fsc->sb) { - /* not this fs! */ + if (!dir || dir->i_sb != mdsc->fsc->sb) { + /* not this fs or parent went negative */ inode = d_inode(req->r_dentry); + if (inode) + ihold(inode); } else if (ceph_snap(dir) != CEPH_NOSNAP) { /* direct snapped/virtual snapdir requests * based on parent dir inode */ - struct dentry *dn = get_nonsnap_parent(parent); - inode = d_inode(dn); + inode = get_nonsnap_parent(parent); dout("__choose_mds using nonsnap parent %p\n", inode); } else { /* dentry target */ inode = d_inode(req->r_dentry); if (!inode || mode == USE_AUTH_MDS) { /* dir + name */ - inode = dir; + inode = igrab(dir); hash = ceph_dentry_hash(dir, req->r_dentry); is_hash = true; + } else { + ihold(inode); } } + rcu_read_unlock(); } dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, @@ -769,7 +792,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, (int)r, frag.ndist); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) - return mds; + goto out; } /* since this file/dir wasn't known to be @@ -784,7 +807,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, inode, ceph_vinop(inode), frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) - return mds; + goto out; } } } @@ -797,6 +820,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); if (!cap) { spin_unlock(&ci->i_ceph_lock); + iput(inode); goto random; } mds = cap->session->s_mds; @@ -804,6 +828,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, inode, ceph_vinop(inode), mds, cap == ci->i_auth_cap ? "auth " : "", cap); spin_unlock(&ci->i_ceph_lock); +out: + iput(inode); return mds; random: @@ -1036,7 +1062,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, while (!list_empty(&session->s_unsafe)) { req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); - list_del_init(&req->r_unsafe_item); pr_warn_ratelimited(" dropping unsafe request %llu\n", req->r_tid); __unregister_request(mdsc, req); @@ -1146,7 +1171,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; if (ci->i_wrbuffer_ref > 0 && - ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) + READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) invalidate = true; while (!list_empty(&ci->i_cap_flush_list)) { @@ -1775,18 +1800,23 @@ retry: return path; } -static int build_dentry_path(struct dentry *dentry, +static int build_dentry_path(struct dentry *dentry, struct inode *dir, const char **ppath, int *ppathlen, u64 *pino, int *pfreepath) { char *path; - if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) { - *pino = ceph_ino(d_inode(dentry->d_parent)); + rcu_read_lock(); + if (!dir) + dir = d_inode_rcu(dentry->d_parent); + if (dir && ceph_snap(dir) == CEPH_NOSNAP) { + *pino = ceph_ino(dir); + rcu_read_unlock(); *ppath = dentry->d_name.name; *ppathlen = dentry->d_name.len; return 0; } + rcu_read_unlock(); path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); if (IS_ERR(path)) return PTR_ERR(path); @@ -1822,8 +1852,8 @@ static int build_inode_path(struct inode *inode, * an explicit ino+path. */ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, - const char *rpath, u64 rino, - const char **ppath, int *pathlen, + struct inode *rdiri, const char *rpath, + u64 rino, const char **ppath, int *pathlen, u64 *ino, int *freepath) { int r = 0; @@ -1833,7 +1863,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), ceph_snap(rinode)); } else if (rdentry) { - r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); + r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, + freepath); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); } else if (rpath || rino) { @@ -1866,7 +1897,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, int ret; ret = set_request_path_attr(req->r_inode, req->r_dentry, - req->r_path1, req->r_ino1.ino, + req->r_parent, req->r_path1, req->r_ino1.ino, &path1, &pathlen1, &ino1, &freepath1); if (ret < 0) { msg = ERR_PTR(ret); @@ -1874,6 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, } ret = set_request_path_attr(NULL, req->r_old_dentry, + req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, &path2, &pathlen2, &ino2, &freepath2); if (ret < 0) { @@ -1927,10 +1959,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, mds, req->r_inode_drop, req->r_inode_unless, 0); if (req->r_dentry_drop) releases += ceph_encode_dentry_release(&p, req->r_dentry, - mds, req->r_dentry_drop, req->r_dentry_unless); + req->r_parent, mds, req->r_dentry_drop, + req->r_dentry_unless); if (req->r_old_dentry_drop) releases += ceph_encode_dentry_release(&p, req->r_old_dentry, - mds, req->r_old_dentry_drop, req->r_old_dentry_unless); + req->r_old_dentry_dir, mds, + req->r_old_dentry_drop, + req->r_old_dentry_unless); if (req->r_old_inode_drop) releases += ceph_encode_inode_release(&p, d_inode(req->r_old_dentry), @@ -2012,7 +2047,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); - if (req->r_got_unsafe) { + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { void *p; /* * Replay. Do not regenerate message (and rebuild @@ -2061,16 +2096,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, rhead = msg->front.iov_base; rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); - if (req->r_got_unsafe) + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) flags |= CEPH_MDS_FLAG_REPLAY; - if (req->r_locked_dir) + if (req->r_parent) flags |= CEPH_MDS_FLAG_WANT_DENTRY; rhead->flags = cpu_to_le32(flags); rhead->num_fwd = req->r_num_fwd; rhead->num_retry = req->r_attempts - 1; rhead->ino = 0; - dout(" r_locked_dir = %p\n", req->r_locked_dir); + dout(" r_parent = %p\n", req->r_parent); return 0; } @@ -2084,8 +2119,8 @@ static int __do_request(struct ceph_mds_client *mdsc, int mds = -1; int err = 0; - if (req->r_err || req->r_got_result) { - if (req->r_aborted) + if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { + if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) __unregister_request(mdsc, req); goto out; } @@ -2096,12 +2131,12 @@ static int __do_request(struct ceph_mds_client *mdsc, err = -EIO; goto finish; } - if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { dout("do_request forced umount\n"); err = -EIO; goto finish; } - if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { if (mdsc->mdsmap_err) { err = mdsc->mdsmap_err; dout("do_request mdsmap err %d\n", err); @@ -2215,7 +2250,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) while (p) { req = rb_entry(p, struct ceph_mds_request, r_node); p = rb_next(p); - if (req->r_got_unsafe) + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) continue; if (req->r_attempts > 0) continue; /* only new requests */ @@ -2250,11 +2285,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, dout("do_request on %p\n", req); - /* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */ + /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ if (req->r_inode) ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); - if (req->r_locked_dir) - ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); + if (req->r_parent) + ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); if (req->r_old_dentry_dir) ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), CEPH_CAP_PIN); @@ -2289,7 +2324,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); /* only abort if we didn't race with a real reply */ - if (req->r_got_result) { + if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { err = le32_to_cpu(req->r_reply_info.head->result); } else if (err < 0) { dout("aborted request %lld with %d\n", req->r_tid, err); @@ -2301,10 +2336,10 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, */ mutex_lock(&req->r_fill_mutex); req->r_err = err; - req->r_aborted = true; + set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); mutex_unlock(&req->r_fill_mutex); - if (req->r_locked_dir && + if (req->r_parent && (req->r_op & CEPH_MDS_OP_WRITE)) ceph_invalidate_dir_request(req); } else { @@ -2323,7 +2358,7 @@ out: */ void ceph_invalidate_dir_request(struct ceph_mds_request *req) { - struct inode *inode = req->r_locked_dir; + struct inode *inode = req->r_parent; dout("invalidate_dir_request %p (complete, lease(s))\n", inode); @@ -2379,14 +2414,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } /* dup? */ - if ((req->r_got_unsafe && !head->safe) || - (req->r_got_safe && head->safe)) { + if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) || + (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) { pr_warn("got a dup %s reply on %llu from mds%d\n", head->safe ? "safe" : "unsafe", tid, mds); mutex_unlock(&mdsc->mutex); goto out; } - if (req->r_got_safe) { + if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) { pr_warn("got unsafe after safe on %llu from mds%d\n", tid, mds); mutex_unlock(&mdsc->mutex); @@ -2425,10 +2460,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (head->safe) { - req->r_got_safe = true; + set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags); __unregister_request(mdsc, req); - if (req->r_got_unsafe) { + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { /* * We already handled the unsafe response, now do the * cleanup. No need to examine the response; the MDS @@ -2437,7 +2472,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * useful we could do with a revised return value. */ dout("got safe reply %llu, mds%d\n", tid, mds); - list_del_init(&req->r_unsafe_item); /* last unsafe request during umount? */ if (mdsc->stopping && !__get_oldest_req(mdsc)) @@ -2446,7 +2480,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) goto out; } } else { - req->r_got_unsafe = true; + set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags); list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); if (req->r_unsafe_dir) { struct ceph_inode_info *ci = @@ -2486,7 +2520,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* insert trace into our cache */ mutex_lock(&req->r_fill_mutex); current->journal_info = req; - err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); + err = ceph_fill_trace(mdsc->fsc->sb, req); if (err == 0) { if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || req->r_op == CEPH_MDS_OP_LSSNAP)) @@ -2500,7 +2534,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (realm) ceph_put_snap_realm(mdsc, realm); - if (err == 0 && req->r_got_unsafe && req->r_target_inode) { + if (err == 0 && req->r_target_inode && + test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); spin_lock(&ci->i_unsafe_lock); list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops); @@ -2508,12 +2543,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } out_err: mutex_lock(&mdsc->mutex); - if (!req->r_aborted) { + if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { if (err) { req->r_err = err; } else { req->r_reply = ceph_msg_get(msg); - req->r_got_result = true; + set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags); } } else { dout("reply arrived after request %lld was aborted\n", tid); @@ -2557,7 +2592,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, goto out; /* dup reply? */ } - if (req->r_aborted) { + if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { dout("forward tid %llu aborted, unregistering\n", tid); __unregister_request(mdsc, req); } else if (fwd_seq <= req->r_num_fwd) { @@ -2567,7 +2602,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, /* resend. forward race not possible; mds would drop */ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); BUG_ON(req->r_err); - BUG_ON(req->r_got_result); + BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)); req->r_attempts = 0; req->r_num_fwd = fwd_seq; req->r_resend_mds = next_mds; @@ -2732,7 +2767,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, while (p) { req = rb_entry(p, struct ceph_mds_request, r_node); p = rb_next(p); - if (req->r_got_unsafe) + if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) continue; if (req->r_attempts == 0) continue; /* only old requests */ @@ -3556,7 +3591,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush; - if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) return; dout("sync\n"); @@ -3587,7 +3622,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) */ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) { - if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) + if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) return true; return atomic_read(&mdsc->num_sessions) <= skipped; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 3c6f77b7bb02..ac0475a2daa7 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -202,9 +202,18 @@ struct ceph_mds_request { char *r_path1, *r_path2; struct ceph_vino r_ino1, r_ino2; - struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ + struct inode *r_parent; /* parent dir inode */ struct inode *r_target_inode; /* resulting inode */ +#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */ +#define CEPH_MDS_R_ABORTED (2) /* call was aborted */ +#define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */ +#define CEPH_MDS_R_GOT_SAFE (4) /* got a safe reply */ +#define CEPH_MDS_R_GOT_RESULT (5) /* got a result */ +#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */ +#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */ + unsigned long r_req_flags; + struct mutex r_fill_mutex; union ceph_mds_request_args r_args; @@ -216,7 +225,6 @@ struct ceph_mds_request { /* for choosing which mds to send this request to */ int r_direct_mode; u32 r_direct_hash; /* choose dir frag based on this dentry hash */ - bool r_direct_is_hash; /* true if r_direct_hash is valid */ /* data payload is used for xattr ops */ struct ceph_pagelist *r_pagelist; @@ -234,7 +242,6 @@ struct ceph_mds_request { struct ceph_mds_reply_info_parsed r_reply_info; struct page *r_locked_page; int r_err; - bool r_aborted; unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ unsigned long r_started; /* start time to measure timeout against */ @@ -262,9 +269,7 @@ struct ceph_mds_request { ceph_mds_request_callback_t r_callback; ceph_mds_request_wait_callback_t r_wait_for_completion; struct list_head r_unsafe_item; /* per-session unsafe list item */ - bool r_got_unsafe, r_got_safe, r_got_result; - bool r_did_prepopulate; long long r_dir_release_cnt; long long r_dir_ordered_cnt; int r_readdir_cache_idx; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 6bd20d707bfd..0ec8d0114e57 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = { .destroy_inode = ceph_destroy_inode, .write_inode = ceph_write_inode, .drop_inode = ceph_drop_inode, - .evict_inode = ceph_evict_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, .show_options = ceph_show_options, @@ -952,6 +951,14 @@ static int ceph_register_bdi(struct super_block *sb, fsc->backing_dev_info.ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + if (fsc->mount_options->rsize > fsc->mount_options->rasize && + fsc->mount_options->rsize >= PAGE_SIZE) + fsc->backing_dev_info.io_pages = + (fsc->mount_options->rsize + PAGE_SIZE - 1) + >> PAGE_SHIFT; + else if (fsc->mount_options->rsize == 0) + fsc->backing_dev_info.io_pages = ULONG_MAX; + err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", atomic_long_inc_return(&bdi_seq)); if (!err) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3373b61faefd..e9410bcf4113 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -45,8 +45,8 @@ #define ceph_test_mount_opt(fsc, opt) \ (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) -#define CEPH_RSIZE_DEFAULT 0 /* max read size */ -#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */ +#define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */ +#define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */ #define CEPH_MAX_READDIR_DEFAULT 1024 #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) #define CEPH_SNAPDIRNAME_DEFAULT ".snap" @@ -343,7 +343,6 @@ struct ceph_inode_info { u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ - struct list_head i_unsafe_writes; /* uncommitted sync writes */ struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock; @@ -602,7 +601,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) } /* what the mds thinks we want */ -extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); +extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check); extern void ceph_caps_init(struct ceph_mds_client *mdsc); extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); @@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops; extern struct inode *ceph_alloc_inode(struct super_block *sb); extern void ceph_destroy_inode(struct inode *inode); extern int ceph_drop_inode(struct inode *inode); -extern void ceph_evict_inode(struct inode *inode); extern struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino); @@ -764,8 +762,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued, u64 time_warp_seq, struct timespec *ctime, struct timespec *mtime, struct timespec *atime); extern int ceph_fill_trace(struct super_block *sb, - struct ceph_mds_request *req, - struct ceph_mds_session *session); + struct ceph_mds_request *req); extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct ceph_mds_session *session); @@ -904,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); extern int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force); extern int ceph_encode_dentry_release(void **p, struct dentry *dn, + struct inode *dir, int mds, int drop, int unless); extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, @@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); -extern void ceph_sync_write_wait(struct inode *inode); + /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct file_operations ceph_snapdir_fops; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 98dc842e7245..aa3debbba826 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3282,7 +3282,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) * sure that it doesn't change while being written back. */ static int -cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +cifs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; @@ -925,12 +925,11 @@ static int dax_insert_mapping(struct address_space *mapping, /** * dax_pfn_mkwrite - handle first write to DAX page - * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault */ -int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int dax_pfn_mkwrite(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; void *entry, **slot; pgoff_t index = vmf->pgoff; @@ -1119,20 +1118,10 @@ static int dax_fault_return(int error) return VM_FAULT_SIGBUS; } -/** - * dax_iomap_fault - handle a page fault on a DAX file - * @vma: The virtual memory area where the fault occurred - * @vmf: The description of the fault - * @ops: iomap ops passed from the file system - * - * When a page fault occurs, filesystems may call this helper in their fault - * or mkwrite handler for DAX files. Assumes the caller has done all the - * necessary locking for the page fault to proceed successfully. - */ -int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - const struct iomap_ops *ops) +static int dax_iomap_pte_fault(struct vm_fault *vmf, + const struct iomap_ops *ops) { - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; @@ -1205,11 +1194,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, case IOMAP_MAPPED: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } error = dax_insert_mapping(mapping, iomap.bdev, sector, - PAGE_SIZE, &entry, vma, vmf); + PAGE_SIZE, &entry, vmf->vma, vmf); /* -EBUSY is fine, somebody else faulted on the same PTE */ if (error == -EBUSY) error = 0; @@ -1247,7 +1236,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } return vmf_ret; } -EXPORT_SYMBOL_GPL(dax_iomap_fault); #ifdef CONFIG_FS_DAX_PMD /* @@ -1338,7 +1326,8 @@ fallback: return VM_FAULT_FALLBACK; } -int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops) +static int dax_iomap_pmd_fault(struct vm_fault *vmf, + const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; @@ -1446,5 +1435,34 @@ out: trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result); return result; } -EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); +#else +static int dax_iomap_pmd_fault(struct vm_fault *vmf, + const struct iomap_ops *ops) +{ + return VM_FAULT_FALLBACK; +} #endif /* CONFIG_FS_DAX_PMD */ + +/** + * dax_iomap_fault - handle a page fault on a DAX file + * @vmf: The description of the fault + * @ops: iomap ops passed from the file system + * + * When a page fault occurs, filesystems may call this helper in + * their fault handler for DAX files. dax_iomap_fault() assumes the caller + * has done all the necessary locking for page fault to proceed + * successfully. + */ +int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, + const struct iomap_ops *ops) +{ + switch (pe_size) { + case PE_SIZE_PTE: + return dax_iomap_pte_fault(vmf, ops); + case PE_SIZE_PMD: + return dax_iomap_pmd_fault(vmf, ops); + default: + return VM_FAULT_FALLBACK; + } +} +EXPORT_SYMBOL_GPL(dax_iomap_fault); diff --git a/fs/direct-io.c b/fs/direct-io.c index c87bae4376b8..a04ebea77de8 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -587,7 +587,7 @@ static int dio_set_defer_completion(struct dio *dio) /* * Call into the fs to map some more disk blocks. We record the current number * of available blocks at sdio->blocks_available. These are in units of the - * fs blocksize, (1 << inode->i_blkbits). + * fs blocksize, i_blocksize(inode). * * The fs is allowed to map lots of blocks at once. If it wants to do that, * it uses the passed inode-relative block number as the file offset, as usual. diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 866bb18efefe..e00d45af84ea 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -123,7 +123,7 @@ void ecryptfs_destroy_kthread(void) * @lower_dentry: Lower dentry for file to open * @lower_mnt: Lower vfsmount for file to open * - * This function gets a r/w file opened againt the lower dentry. + * This function gets a r/w file opened against the lower dentry. * * Returns zero on success; non-zero otherwise */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index bcb68fcc8445..5ec16313da1a 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1895,7 +1895,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. * Also, we do not currently supported nested exclusive wakeups. */ - if (epds.events & EPOLLEXCLUSIVE) { + if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) { if (op == EPOLL_CTL_MOD) goto error_tgt_fput; if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) || diff --git a/fs/ext2/file.c b/fs/ext2/file.c index b0f241528a30..b21891a6bfca 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -87,19 +87,19 @@ out_unlock: * The default page_lock and i_size verification done by non-DAX fault paths * is sufficient because ext2 doesn't support hole punching. */ -static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ext2_dax_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); int ret; if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } down_read(&ei->dax_sem); - ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) @@ -107,16 +107,15 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return ret; } -static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int ext2_dax_pfn_mkwrite(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); loff_t size; int ret; sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); down_read(&ei->dax_sem); /* check that the faulting page hasn't raced with truncate */ @@ -124,7 +123,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); up_read(&ei->dax_sem); sb_end_pagefault(inode->i_sb); @@ -134,7 +133,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, /* - * .pmd_fault is not supported for DAX because allocation in ext2 + * .huge_fault is not supported for DAX because allocation in ext2 * cannot be reliably aligned to huge page sizes and so pmd faults * will always fail and fail back to regular faults. */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cee23b684f47..2fd17e8e4984 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2483,8 +2483,8 @@ extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); -extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int ext4_page_mkwrite(struct vm_fault *vmf); +extern int ext4_filemap_fault(struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int ext4_get_projid(struct inode *inode, kprojid_t *projid); extern void ext4_da_update_reserve_space(struct inode *inode, diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 37e059202cd2..e7f12a204cbc 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -84,7 +84,7 @@ * -- writeout * Writeout looks up whole page cache to see if a buffer is * mapped, If there are not very many delayed buffers, then it is - * time comsuming. + * time consuming. * * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA, * bigalloc and writeout can figure out if a block or a range of diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 13021a054fc0..8210c1f43556 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -253,19 +253,20 @@ out: } #ifdef CONFIG_FS_DAX -static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ext4_dax_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size) { int result; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; if (write) { sb_start_pagefault(sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } down_read(&EXT4_I(inode)->i_mmap_sem); - result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); + result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); up_read(&EXT4_I(inode)->i_mmap_sem); if (write) sb_end_pagefault(sb); @@ -273,25 +274,9 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return result; } -static int -ext4_dax_pmd_fault(struct vm_fault *vmf) +static int ext4_dax_fault(struct vm_fault *vmf) { - int result; - struct inode *inode = file_inode(vmf->vma->vm_file); - struct super_block *sb = inode->i_sb; - bool write = vmf->flags & FAULT_FLAG_WRITE; - - if (write) { - sb_start_pagefault(sb); - file_update_time(vmf->vma->vm_file); - } - down_read(&EXT4_I(inode)->i_mmap_sem); - result = dax_iomap_pmd_fault(vmf, &ext4_iomap_ops); - up_read(&EXT4_I(inode)->i_mmap_sem); - if (write) - sb_end_pagefault(sb); - - return result; + return ext4_dax_huge_fault(vmf, PE_SIZE_PTE); } /* @@ -303,22 +288,21 @@ ext4_dax_pmd_fault(struct vm_fault *vmf) * wp_pfn_shared() fails. Thus fault gets retried and things work out as * desired. */ -static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; loff_t size; int ret; sb_start_pagefault(sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); @@ -327,7 +311,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, - .pmd_fault = ext4_dax_pmd_fault, + .huge_fault = ext4_dax_huge_fault, .page_mkwrite = ext4_dax_fault, .pfn_mkwrite = ext4_dax_pfn_mkwrite, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 75212a6e69f8..971f66342080 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2221,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, { struct inode *inode = mpd->inode; int err; - ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) + ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) >> inode->i_blkbits; do { @@ -3577,7 +3577,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) if (overwrite) get_block_func = ext4_dio_get_block_overwrite; else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || - round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) { + round_down(offset, i_blocksize(inode)) >= inode->i_size) { get_block_func = ext4_dio_get_block; dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; } else if (is_sync_kiocb(iocb)) { @@ -5179,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * do. We do the check mainly to optimize the common PAGE_SIZE == * blocksize case */ - if (offset > PAGE_SIZE - (1 << inode->i_blkbits)) + if (offset > PAGE_SIZE - i_blocksize(inode)) return; while (1) { page = find_lock_page(inode->i_mapping, @@ -5821,8 +5821,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int ext4_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; loff_t size; unsigned long len; @@ -5912,13 +5913,13 @@ out: return ret; } -int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int ext4_filemap_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int err; down_read(&EXT4_I(inode)->i_mmap_sem); - err = filemap_fault(vma, vmf); + err = filemap_fault(vmf); up_read(&EXT4_I(inode)->i_mmap_sem); return err; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 10c62de642c6..354dc1a894c2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -838,7 +838,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) inode = page->mapping->host; sb = inode->i_sb; ngroups = ext4_get_groups_count(sb); - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); blocks_per_page = PAGE_SIZE / blocksize; groups_per_page = blocks_per_page >> 1; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 6fc14def0c70..578f8c33fb44 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) if (PageUptodate(page)) return 0; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 49f10dce817d..1edc86e874e3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -32,11 +32,10 @@ #include "trace.h" #include <trace/events/f2fs.h> -static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; int err; @@ -58,7 +57,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2401c5dabb2a..e80bfd06daf5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2043,12 +2043,12 @@ static void fuse_vma_close(struct vm_area_struct *vma) * - sync(2) * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER */ -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int fuse_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping) { unlock_page(page); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 016c11eaca7c..6fe2a59c6a9a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -379,10 +379,10 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int gfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; @@ -399,7 +399,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out; - gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE); + gfs2_size_hint(vmf->vma->vm_file, pos, PAGE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); @@ -407,7 +407,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out_uninit; /* Update file times before taking page lock */ - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index a3ec3ae7d347..482081bcdf70 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -38,7 +38,7 @@ static int hfs_get_last_session(struct super_block *sb, /* default values */ *start = 0; - *size = sb->s_bdev->bd_inode->i_size >> 9; + *size = i_size_read(sb->s_bdev->bd_inode) >> 9; if (HFS_SB(sb)->session >= 0) { te.cdte_track = HFS_SB(sb)->session; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index ebb85e5f6549..e254fa0f0697 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -132,7 +132,7 @@ static int hfsplus_get_last_session(struct super_block *sb, /* default values */ *start = 0; - *size = sb->s_bdev->bd_inode->i_size >> 9; + *size = i_size_read(sb->s_bdev->bd_inode) >> 9; if (HFSPLUS_SB(sb)->session >= 0) { te.cdte_track = HFSPLUS_SB(sb)->session; diff --git a/fs/iomap.c b/fs/iomap.c index d89f70bbb952..0f85f2410605 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -420,8 +420,8 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops) { - unsigned blocksize = (1 << inode->i_blkbits); - unsigned off = pos & (blocksize - 1); + unsigned int blocksize = i_blocksize(inode); + unsigned int off = pos & (blocksize - 1); /* Block boundary? Nothing to do */ if (!off) @@ -445,11 +445,10 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, return length; } -int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - const struct iomap_ops *ops) +int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); unsigned long length; loff_t offset, size; ssize_t ret; @@ -736,9 +735,9 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap) { struct iomap_dio *dio = data; - unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); - unsigned fs_block_size = (1 << inode->i_blkbits), pad; - unsigned align = iov_iter_alignment(dio->submit.iter); + unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); + unsigned int fs_block_size = i_blocksize(inode), pad; + unsigned int align = iov_iter_alignment(dio->submit.iter); struct iov_iter iter; struct bio *bio; bool need_zeroout = false; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 2be7c9ce6663..c64c2574a0aa 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, sb->s_blocksize - offset : toread; tmp_bh.b_state = 0; - tmp_bh.b_size = 1 << inode->i_blkbits; + tmp_bh.b_size = i_blocksize(inode); err = jfs_get_block(inode, blk, &tmp_bh, 0); if (err) return err; @@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; - tmp_bh.b_size = 1 << inode->i_blkbits; + tmp_bh.b_size = i_blocksize(inode); err = jfs_get_block(inode, blk, &tmp_bh, 1); if (err) goto out; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 439b946c4808..db5900aaa55a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -478,7 +478,7 @@ static void kernfs_drain(struct kernfs_node *kn) rwsem_release(&kn->dep_map, 1, _RET_IP_); } - kernfs_unmap_bin_file(kn); + kernfs_drain_open_files(kn); mutex_lock(&kernfs_mutex); } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 78219d5644e9..35043a8c4529 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -348,9 +348,9 @@ static void kernfs_vma_open(struct vm_area_struct *vma) kernfs_put_active(of->kn); } -static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int kernfs_vma_fault(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); int ret; @@ -362,16 +362,15 @@ static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ret = VM_FAULT_SIGBUS; if (of->vm_ops->fault) - ret = of->vm_ops->fault(vma, vmf); + ret = of->vm_ops->fault(vmf); kernfs_put_active(of->kn); return ret; } -static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int kernfs_vma_page_mkwrite(struct vm_fault *vmf) { - struct file *file = vma->vm_file; + struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); int ret; @@ -383,7 +382,7 @@ static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, ret = 0; if (of->vm_ops->page_mkwrite) - ret = of->vm_ops->page_mkwrite(vma, vmf); + ret = of->vm_ops->page_mkwrite(vmf); else file_update_time(file); @@ -516,7 +515,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) goto out_put; rc = 0; - of->mmapped = 1; + of->mmapped = true; of->vm_ops = vma->vm_ops; vma->vm_ops = &kernfs_vm_ops; out_put: @@ -708,7 +707,8 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) if (error) goto err_free; - ((struct seq_file *)file->private_data)->private = of; + of->seq_file = file->private_data; + of->seq_file->private = of; /* seq_file clears PWRITE unconditionally, restore it if WRITE */ if (file->f_mode & FMODE_WRITE) @@ -717,13 +717,22 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) /* make sure we have open node struct */ error = kernfs_get_open_node(kn, of); if (error) - goto err_close; + goto err_seq_release; + + if (ops->open) { + /* nobody has access to @of yet, skip @of->mutex */ + error = ops->open(of); + if (error) + goto err_put_node; + } /* open succeeded, put active references */ kernfs_put_active(kn); return 0; -err_close: +err_put_node: + kernfs_put_open_node(kn, of); +err_seq_release: seq_release(inode, file); err_free: kfree(of->prealloc_buf); @@ -733,11 +742,41 @@ err_out: return error; } +/* used from release/drain to ensure that ->release() is called exactly once */ +static void kernfs_release_file(struct kernfs_node *kn, + struct kernfs_open_file *of) +{ + /* + * @of is guaranteed to have no other file operations in flight and + * we just want to synchronize release and drain paths. + * @kernfs_open_file_mutex is enough. @of->mutex can't be used + * here because drain path may be called from places which can + * cause circular dependency. + */ + lockdep_assert_held(&kernfs_open_file_mutex); + + if (!of->released) { + /* + * A file is never detached without being released and we + * need to be able to release files which are deactivated + * and being drained. Don't use kernfs_ops(). + */ + kn->attr.ops->release(of); + of->released = true; + } +} + static int kernfs_fop_release(struct inode *inode, struct file *filp) { struct kernfs_node *kn = filp->f_path.dentry->d_fsdata; struct kernfs_open_file *of = kernfs_of(filp); + if (kn->flags & KERNFS_HAS_RELEASE) { + mutex_lock(&kernfs_open_file_mutex); + kernfs_release_file(kn, of); + mutex_unlock(&kernfs_open_file_mutex); + } + kernfs_put_open_node(kn, of); seq_release(inode, filp); kfree(of->prealloc_buf); @@ -746,12 +785,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp) return 0; } -void kernfs_unmap_bin_file(struct kernfs_node *kn) +void kernfs_drain_open_files(struct kernfs_node *kn) { struct kernfs_open_node *on; struct kernfs_open_file *of; - if (!(kn->flags & KERNFS_HAS_MMAP)) + if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) return; spin_lock_irq(&kernfs_open_node_lock); @@ -763,10 +802,16 @@ void kernfs_unmap_bin_file(struct kernfs_node *kn) return; mutex_lock(&kernfs_open_file_mutex); + list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); - unmap_mapping_range(inode->i_mapping, 0, 0, 1); + + if (kn->flags & KERNFS_HAS_MMAP) + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + + kernfs_release_file(kn, of); } + mutex_unlock(&kernfs_open_file_mutex); kernfs_put_open_node(kn, NULL); @@ -965,6 +1010,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, kn->flags |= KERNFS_HAS_SEQ_SHOW; if (ops->mmap) kn->flags |= KERNFS_HAS_MMAP; + if (ops->release) + kn->flags |= KERNFS_HAS_RELEASE; rc = kernfs_add_one(kn); if (rc) { diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index bfd551bbf231..3100987cf8ba 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -104,7 +104,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, */ extern const struct file_operations kernfs_file_fops; -void kernfs_unmap_bin_file(struct kernfs_node *kn); +void kernfs_drain_open_files(struct kernfs_node *kn); /* * symlink.c diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 1c13dd80744f..7e4ea3b9f472 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; + if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, (struct sockaddr *)&sin6); } diff --git a/fs/mpage.c b/fs/mpage.c index 28af984a3d96..baff8f820c29 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -115,7 +115,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) SetPageUptodate(page); return; } - create_empty_buffers(page, 1 << inode->i_blkbits, 0); + create_empty_buffers(page, i_blocksize(inode), 0); } head = page_buffers(page); page_bh = head; diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 39f57bef8531..0c3905e0542e 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -27,10 +27,9 @@ * XXX: how are we excluding truncate/invalidate here? Maybe need to lock * page? */ -static int ncp_file_mmap_fault(struct vm_area_struct *area, - struct vm_fault *vmf) +static int ncp_file_mmap_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(area->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); char *pg_addr; unsigned int already_read; unsigned int count; @@ -90,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, * -- nyc */ count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT); + mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); return VM_FAULT_MAJOR; } diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index f32f272ee501..97b111d79489 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -525,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len) return result; } if (result > len) { - pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len); + pr_err("tcp: bug in recvmsg (%u > %zu)\n", result, len); return -EIO; } return result; @@ -619,7 +619,7 @@ skipdata:; goto skipdata2; } if (datalen > req->datalen + 8) { - pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8); + pr_err("tcp: Unexpected reply len %d (expected at most %zd)\n", datalen, req->datalen + 8); server->rcv.state = 3; goto skipdata; } diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 2905479f214a..0ca370d23ddb 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -381,7 +381,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) struct blk_plug plug; int i; - dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + dprintk("%s enter, %zu@%lld\n", __func__, count, offset); /* At this point, header->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index eb094c6011d8..fd0284c1dc32 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = { .vs_proc = nfs4_callback_procedures1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_dispatch = NULL, - .vs_hidden = 1, + .vs_hidden = true, + .vs_need_cong_ctrl = true, }; struct svc_version nfs4_callback_version4 = { @@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = { .vs_proc = nfs4_callback_procedures1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_dispatch = NULL, - .vs_hidden = 1, + .vs_hidden = true, + .vs_need_cong_ctrl = true, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 26dbe8b0c10d..668213984d68 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -528,10 +528,10 @@ const struct address_space_operations nfs_file_aops = { * writable, implying that someone is about to modify the page through a * shared-writable mapping */ -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int nfs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct file *filp = vma->vm_file; + struct file *filp = vmf->vma->vm_file; struct inode *inode = file_inode(filp); unsigned pagelen; int ret = VM_FAULT_NOPAGE; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a3fc48ba4931..18f98e08544d 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -482,7 +482,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) u32 j, idx; struct nfs_fh *fh; - dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, hdr->args.pgbase, (size_t)hdr->args.count, offset); @@ -540,7 +540,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) if (IS_ERR(ds_clnt)) return PNFS_NOT_ATTEMPTED; - dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", + dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0ca4af8cca5d..d6acc688df7e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1751,7 +1751,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) int vers; struct nfs_fh *fh; - dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, hdr->args.pgbase, (size_t)hdr->args.count, offset); @@ -1828,7 +1828,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) vers = nfs4_ff_layout_ds_version(lseg, idx); - dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", + dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 2a4cdce939a0..8f3d2acb81c3 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -291,7 +291,7 @@ objlayout_read_pagelist(struct nfs_pgio_header *hdr) &hdr->args.pgbase, hdr->args.offset, hdr->args.count); - dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", + dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n", __func__, inode->i_ino, offset, count, hdr->res.eof); err = objio_read_pagelist(hdr); diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index a06115e31612..92b4b41d19d2 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -24,7 +24,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, { struct nfsd4_layout_seg *seg = &args->lg_seg; struct super_block *sb = inode->i_sb; - u32 block_size = (1 << inode->i_blkbits); + u32 block_size = i_blocksize(inode); struct pnfs_block_extent *bex; struct iomap iomap; u32 device_generation = 0; @@ -181,7 +181,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, int nr_iomaps; nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + lcp->lc_up_len, &iomaps, i_blocksize(inode)); if (nr_iomaps < 0) return nfserrno(nr_iomaps); @@ -375,7 +375,7 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, int nr_iomaps; nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + lcp->lc_up_len, &iomaps, i_blocksize(inode)); if (nr_iomaps < 0) return nfserrno(nr_iomaps); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 43e109cc0ccc..e71f11b1a180 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1102,6 +1102,7 @@ static struct flags { { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, { NFSEXP_V4ROOT, {"v4root", ""}}, { NFSEXP_PNFS, {"pnfs", ""}}, + { NFSEXP_SECURITY_LABEL, {"security_label", ""}}, { 0, {"", ""}} }; diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index d08cd88155c7..838f90f3f890 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = { .vs_proc = nfsd_acl_procedures2, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS3_SVC_XDRSIZE, - .vs_hidden = 0, }; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 0c890347cde3..dcb5f79076c0 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = { .vs_proc = nfsd_acl_procedures3, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS3_SVC_XDRSIZE, - .vs_hidden = 0, }; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index d818e4ffd79f..045c9081eabe 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nfserr = nfsd_write(rqstp, &resp->fh, NULL, - argp->offset, - rqstp->rq_vec, argp->vlen, - &cnt, - &resp->committed); + nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, + rqstp->rq_vec, argp->vlen, + &cnt, resp->committed); resp->count = cnt; RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index eb78109d666c..0274db6e65d0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, length + 4); if (unlikely(p == NULL)) goto out_overflow; + p += XDR_QUADLEN(length); hdr->nops = be32_to_cpup(p); return 0; out_overflow: @@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, struct nfsd4_callback *cb) { struct nfsd4_session *session = cb->cb_clp->cl_cb_session; - struct nfs4_sessionid id; - int status; + int status = -ESERVERFAULT; __be32 *p; u32 dummy; - status = -ESERVERFAULT; - /* * If the server returns different values for sessionID, slotID or * sequence number, the server is looney tunes. @@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4); if (unlikely(p == NULL)) goto out_overflow; - memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); - if (memcmp(id.data, session->se_sessionid.data, - NFS4_MAX_SESSIONID_LEN) != 0) { + + if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("NFS: %s Invalid session id\n", __func__); goto out; } @@ -753,6 +750,14 @@ int set_callback_cred(void) return 0; } +void cleanup_callback_cred(void) +{ + if (callback_cred) { + put_rpccred(callback_cred); + callback_cred = NULL; + } +} + static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5b20577dcdd2..6b9b6cca469f 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, { __be32 status; u32 id = -1; + + if (name == NULL || namelen == 0) + return nfserr_inval; + status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); *uid = make_kuid(&init_user_ns, id); if (!uid_valid(*uid)) @@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, { __be32 status; u32 id = -1; + + if (name == NULL || namelen == 0) + return nfserr_inval; + status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); *gid = make_kgid(&init_user_ns, id); if (!gid_valid(*gid)) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 74a6e573e061..cbeeda1e94a2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, u32 *bmval, u32 *writable) { struct dentry *dentry = cstate->current_fh.fh_dentry; + struct svc_export *exp = cstate->current_fh.fh_export; if (!nfsd_attrs_supported(cstate->minorversion, bmval)) return nfserr_attrnotsupp; if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry))) return nfserr_attrnotsupp; + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) && + !(exp->ex_flags & NFSEXP_SECURITY_LABEL)) + return nfserr_attrnotsupp; if (writable && !bmval_is_subset(bmval, writable)) return nfserr_inval; if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) && @@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - &write->wr_how_written); + write->wr_how_written); fput(filp); write->wr_bytes_written = cnt; @@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); } +static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + /* ac_supported, ac_resp_access */ + return (op_encode_hdr_size + 2)* sizeof(__be32); +} + static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); @@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, return ret; } +static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE; +} + static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o XDR_QUADLEN(rlen)) * sizeof(__be32); } +static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE; +} + static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); } +static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids) + * sizeof(__be32); +} + static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR * + (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32); +} + static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * @@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) } #ifdef CONFIG_NFSD_PNFS +static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount); + + return (op_encode_hdr_size + + 1 /* gd_layout_type*/ + + XDR_QUADLEN(rlen) + + 2 /* gd_notify_types */) * sizeof(__be32); +} + /* * At this stage we don't really know what layout driver will handle the request, * so we need to define an arbitrary upper bound here. @@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_ } #endif /* CONFIG_NFSD_PNFS */ + +static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 3) * sizeof(__be32); +} + static struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = (nfsd4op_func)nfsd4_access, .op_name = "OP_ACCESS", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize, }, [OP_CLOSE] = { .op_func = (nfsd4op_func)nfsd4_close, @@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_GETFH] = { .op_func = (nfsd4op_func)nfsd4_getfh, .op_name = "OP_GETFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize, }, [OP_LINK] = { .op_func = (nfsd4op_func)nfsd4_link, @@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_LOCKT] = { .op_func = (nfsd4op_func)nfsd4_lockt, .op_name = "OP_LOCKT", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, }, [OP_LOCKU] = { .op_func = (nfsd4op_func)nfsd4_locku, @@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_lookup, .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_name = "OP_LOOKUP", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_name = "OP_LOOKUPP", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_NVERIFY] = { .op_func = (nfsd4op_func)nfsd4_nverify, .op_name = "OP_NVERIFY", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, @@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_READLINK] = { .op_func = (nfsd4op_func)nfsd4_readlink, .op_name = "OP_READLINK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize, }, [OP_REMOVE] = { .op_func = (nfsd4op_func)nfsd4_remove, @@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_secinfo, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize, }, [OP_SETATTR] = { .op_func = (nfsd4op_func)nfsd4_setattr, @@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_VERIFY] = { .op_func = (nfsd4op_func)nfsd4_verify, .op_name = "OP_VERIFY", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_WRITE] = { .op_func = (nfsd4op_func)nfsd4_write, @@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize, }, [OP_TEST_STATEID] = { .op_func = (nfsd4op_func)nfsd4_test_stateid, .op_flags = ALLOWED_WITHOUT_FH, .op_name = "OP_TEST_STATEID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize, }, [OP_FREE_STATEID] = { .op_func = (nfsd4op_func)nfsd4_free_stateid, @@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_getdeviceinfo, .op_flags = ALLOWED_WITHOUT_FH, .op_name = "OP_GETDEVICEINFO", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize, }, [OP_LAYOUTGET] = { .op_func = (nfsd4op_func)nfsd4_layoutget, @@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_SEEK] = { .op_func = (nfsd4op_func)nfsd4_seek, .op_name = "OP_SEEK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize, }, }; @@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) { - struct nfsd4_operation *opdesc; - nfsd4op_rsize estimator; - if (op->opnum == OP_ILLEGAL) return op_encode_hdr_size * sizeof(__be32); - opdesc = OPDESC(op); - estimator = opdesc->op_rsize_bop; - return estimator ? estimator(rqstp, op) : PAGE_SIZE; + + BUG_ON(OPDESC(op)->op_rsize_bop == NULL); + return OPDESC(op)->op_rsize_bop(rqstp, op); } void warn_on_nonidempotent_op(struct nfsd4_op *op) @@ -2476,12 +2537,13 @@ static struct svc_procedure nfsd_procedures4[2] = { }; struct svc_version nfsd_version4 = { - .vs_vers = 4, - .vs_nproc = 2, - .vs_proc = nfsd_procedures4, - .vs_dispatch = nfsd_dispatch, - .vs_xdrsize = NFS4_SVC_XDRSIZE, - .vs_rpcb_optnl = 1, + .vs_vers = 4, + .vs_nproc = 2, + .vs_proc = nfsd_procedures4, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS4_SVC_XDRSIZE, + .vs_rpcb_optnl = true, + .vs_need_cong_ctrl = true, }; /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a0dee8ae9f97..e9ef50addddb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r out_err: conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addrlen = 0; - dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " + dprintk("NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); @@ -7012,23 +7012,24 @@ nfs4_state_start(void) ret = set_callback_cred(); if (ret) - return -ENOMEM; + return ret; + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; - goto out_recovery; + goto out_cleanup_cred; } ret = nfsd4_create_callback_queue(); if (ret) goto out_free_laundry; set_max_delegations(); - return 0; out_free_laundry: destroy_workqueue(laundry_wq); -out_recovery: +out_cleanup_cred: + cleanup_callback_cred(); return ret; } @@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void) { destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); + cleanup_callback_cred(); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8fae53ce21d1..382c1fd05b4c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -58,7 +58,7 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR -u32 nfsd_suppattrs[3][3] = { +const u32 nfsd_suppattrs[3][3] = { {NFSD4_SUPPORTED_ATTRS_WORD0, NFSD4_SUPPORTED_ATTRS_WORD1, NFSD4_SUPPORTED_ATTRS_WORD2}, @@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) READ_BUF(16); p = xdr_decode_hyper(p, &write->wr_offset); write->wr_stable_how = be32_to_cpup(p++); - if (write->wr_stable_how > 2) + if (write->wr_stable_how > NFS_FILE_SYNC) goto xdr_error; write->wr_buflen = be32_to_cpup(p++); @@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } else max_reply += nfsd4_max_reply(argp->rqstp, op); /* - * OP_LOCK may return a conflicting lock. (Special case - * because it will just skip encoding this if it runs - * out of xdr buffer space, and it is the only operation - * that behaves this way.) + * OP_LOCK and OP_LOCKT may return a conflicting lock. + * (Special case because it will just skip encoding this + * if it runs out of xdr buffer space, and it is the only + * operation that behaves this way.) */ - if (op->opnum == OP_LOCK) + if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT) max_reply += NFS4_OPAQUE_LIMIT; if (op->status) { @@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, + struct svc_export *exp) { - if (IS_I_VERSION(inode)) { + if (exp->ex_flags & NFSEXP_V4ROOT) { + *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); + *p++ = 0; + } else if (IS_I_VERSION(inode)) { p = xdr_encode_hyper(p, inode->i_version); } else { *p++ = cpu_to_be32(stat->ctime.tv_sec); @@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, #ifdef CONFIG_NFSD_V4_SECURITY_LABEL if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - err = security_inode_getsecctx(d_inode(dentry), + if (exp->ex_flags & NFSEXP_SECURITY_LABEL) + err = security_inode_getsecctx(d_inode(dentry), &context, &contextlen); + else + err = -EOPNOTSUPP; contextsupport = (err == 0); if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { if (err == -EOPNOTSUPP) @@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - p = encode_change(p, &stat, d_inode(dentry)); + p = encode_change(p, &stat, d_inode(dentry), exp); } if (bmval0 & FATTR4_WORD0_SIZE) { p = xdr_reserve_space(xdr, 8); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index d6b97b424ad1..96fd15979cbd 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -578,7 +578,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) struct kvec *vec = &rqstp->rq_res.head[0]; if (vec->iov_len + data->iov_len > PAGE_SIZE) { - printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n", + printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n", data->iov_len); return 0; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f3b2f34b10a3..73e75ac90525 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -536,6 +536,19 @@ out_free: return rv; } +static ssize_t +nfsd_print_version_support(char *buf, int remaining, const char *sep, + unsigned vers, unsigned minor) +{ + const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u"; + bool supported = !!nfsd_vers(vers, NFSD_TEST); + + if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST)) + supported = false; + return snprintf(buf, remaining, format, sep, + supported ? '+' : '-', vers, minor); +} + static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; @@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) len = qword_get(&mesg, vers, size); if (len <= 0) return -EINVAL; do { + enum vers_op cmd; sign = *vers; if (sign == '+' || sign == '-') num = simple_strtol((vers+1), &minorp, 0); @@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) if (*minorp == '.') { if (num != 4) return -EINVAL; - minor = simple_strtoul(minorp+1, NULL, 0); - if (minor == 0) - return -EINVAL; - if (nfsd_minorversion(minor, sign == '-' ? - NFSD_CLEAR : NFSD_SET) < 0) + if (kstrtouint(minorp+1, 0, &minor) < 0) return -EINVAL; - goto next; - } + } else + minor = 0; + cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { case 2: case 3: - case 4: - nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET); + nfsd_vers(num, cmd); break; + case 4: + if (nfsd_minorversion(minor, cmd) >= 0) + break; default: return -EINVAL; } - next: vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); /* If all get turned off, turn them back on, as @@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) len = 0; sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; - for (num=2 ; num <= 4 ; num++) - if (nfsd_vers(num, NFSD_AVAIL)) { - len = snprintf(buf, remaining, "%s%c%d", sep, - nfsd_vers(num, NFSD_TEST)?'+':'-', - num); - sep = " "; - - if (len >= remaining) - break; - remaining -= len; - buf += len; - tlen += len; - } - if (nfsd_vers(4, NFSD_AVAIL)) - for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; - minor++) { - len = snprintf(buf, remaining, " %c4.%u", - (nfsd_vers(4, NFSD_TEST) && - nfsd_minorversion(minor, NFSD_TEST)) ? - '+' : '-', - minor); - + for (num=2 ; num <= 4 ; num++) { + if (!nfsd_vers(num, NFSD_AVAIL)) + continue; + minor = 0; + do { + len = nfsd_print_version_support(buf, remaining, + sep, num, minor); if (len >= remaining) - break; + goto out; remaining -= len; buf += len; tlen += len; - } - + minor++; + sep = " "; + } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION); + } +out: len = snprintf(buf, remaining, "\n"); if (len >= remaining) return -EINVAL; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index d74c8c44dc35..d96606801d47 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void); FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS) -extern u32 nfsd_suppattrs[3][3]; +extern const u32 nfsd_suppattrs[3][3]; -static inline bool bmval_is_subset(u32 *bm1, u32 *bm2) +static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2) { return !((bm1[0] & ~bm2[0]) || (bm1[1] & ~bm2[1]) || (bm1[2] & ~bm2[2])); } -static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval) +static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) { return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]); } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 010aff5c5a79..fa82b7707e85 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, struct nfsd_attrstat *resp) { __be32 nfserr; - int stable = 1; unsigned long cnt = argp->len; dprintk("nfsd: WRITE %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, - argp->offset, - rqstp->rq_vec, argp->vlen, - &cnt, - &stable); + nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, + rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC); return nfsd_return_attrs(nfserr, resp); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e6bfd96734c0..efd66da99201 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change) return 0; } +static void +nfsd_adjust_nfsd_versions4(void) +{ + unsigned i; + + for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) { + if (nfsd_supported_minorversions[i]) + return; + } + nfsd_vers(4, NFSD_CLEAR); +} + int nfsd_minorversion(u32 minorversion, enum vers_op change) { if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) @@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) switch(change) { case NFSD_SET: nfsd_supported_minorversions[minorversion] = true; + nfsd_vers(4, NFSD_SET); break; case NFSD_CLEAR: nfsd_supported_minorversions[minorversion] = false; + nfsd_adjust_nfsd_versions4(); break; case NFSD_TEST: return nfsd_supported_minorversions[minorversion]; @@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; + if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4516e8b7d776..005c911b34ac 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); +extern void cleanup_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 26c6fdb4bf67..19d50f600e8d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, __be32 err; int host_err; bool get_write_count; - int size_change = 0; + bool size_change = (iap->ia_valid & ATTR_SIZE); if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) - goto out; + return err; if (get_write_count) { host_err = fh_want_write(fhp); if (host_err) - return nfserrno(host_err); + goto out; } dentry = fhp->fh_dentry; @@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) - goto out; + return 0; nfsd_sanitize_attrs(inode, iap); + if (check_guard && guardtime != inode->i_ctime.tv_sec) + return nfserr_notsync; + /* * The size case is special, it changes the file in addition to the - * attributes. + * attributes, and file systems don't expect it to be mixed with + * "random" attribute changes. We thus split out the size change + * into a separate call to ->setattr, and do the rest as a separate + * setattr call. */ - if (iap->ia_valid & ATTR_SIZE) { + if (size_change) { err = nfsd_get_write_access(rqstp, fhp, iap); if (err) - goto out; - size_change = 1; + return err; + } + fh_lock(fhp); + if (size_change) { /* * RFC5661, Section 18.30.4: * Changing the size of a file with SETATTR indirectly @@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * * (and similar for the older RFCs) */ - if (iap->ia_size != i_size_read(inode)) - iap->ia_valid |= ATTR_MTIME; - } + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; - iap->ia_valid |= ATTR_CTIME; + host_err = notify_change(dentry, &size_attr, NULL); + if (host_err) + goto out_unlock; + iap->ia_valid &= ~ATTR_SIZE; - if (check_guard && guardtime != inode->i_ctime.tv_sec) { - err = nfserr_notsync; - goto out_put_write_access; + /* + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. + */ + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + goto out_unlock; } - fh_lock(fhp); + iap->ia_valid |= ATTR_CTIME; host_err = notify_change(dentry, iap, NULL); - fh_unlock(fhp); - err = nfserrno(host_err); -out_put_write_access: +out_unlock: + fh_unlock(fhp); if (size_change) put_write_access(inode); - if (!err) - err = nfserrno(commit_metadata(fhp)); out: - return err; + if (!host_err) + host_err = commit_metadata(fhp); + return nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) @@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file) __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int *stablep) + unsigned long *cnt, int stable) { struct svc_export *exp; - struct inode *inode; mm_segment_t oldfs; __be32 err = 0; int host_err; - int stable = *stablep; int use_wgather; loff_t pos = offset; unsigned int pflags = current->flags; @@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, */ current->flags |= PF_LESS_THROTTLE; - inode = file_inode(file); - exp = fhp->fh_export; - + exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!EX_ISSYNC(exp)) - stable = 0; + stable = NFS_UNSTABLE; if (stable && !use_wgather) flags |= RWF_SYNC; @@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, * N.B. After this call fhp needs an fh_put */ __be32 -nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, - int *stablep) +nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, + struct kvec *vec, int vlen, unsigned long *cnt, int stable) { - __be32 err = 0; + struct file *file = NULL; + __be32 err = 0; trace_write_start(rqstp, fhp, offset, vlen); - if (file) { - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - trace_write_opened(rqstp, fhp, offset, vlen); - err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, - stablep); - trace_write_io_done(rqstp, fhp, offset, vlen); - } else { - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); - if (err) - goto out; + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); + if (err) + goto out; - trace_write_opened(rqstp, fhp, offset, vlen); - if (cnt) - err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, - cnt, stablep); - trace_write_io_done(rqstp, fhp, offset, vlen); - fput(file); - } + trace_write_opened(rqstp, fhp, offset, vlen); + err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable); + trace_write_io_done(rqstp, fhp, offset, vlen); + fput(file); out: trace_write_done(rqstp, fhp, offset, vlen); return err; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 0bf9e7bf5800..db98c48c735a 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); -__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, - loff_t, struct kvec *,int, unsigned long *, int *); +__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, + struct kvec *, int, unsigned long *, int); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, - int *stablep); + int stable); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 2c90e285d7c6..03b8ba933eb2 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -34,7 +34,7 @@ static inline unsigned long nilfs_palloc_groups_per_desc_block(const struct inode *inode) { - return (1UL << inode->i_blkbits) / + return i_blocksize(inode) / sizeof(struct nilfs_palloc_group_desc); } diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index d5c23da43513..c21e0b4454a6 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -50,7 +50,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) brelse(bh); BUG(); } - memset(bh->b_data, 0, 1 << inode->i_blkbits); + memset(bh->b_data, 0, i_blocksize(inode)); bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 2e315f9f2e51..06ffa135dfa6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -119,7 +119,7 @@ nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { - return 1 << btree->b_inode->i_blkbits; + return i_blocksize(btree->b_inode); } static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) @@ -1870,7 +1870,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( - 1 << btree->b_inode->i_blkbits)) { + nilfs_btree_node_size(btree))) { di = &dreq; ni = &nreq; } else { diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 547381f3ce13..c5fa3dee72fc 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -51,8 +51,9 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return err; } -static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int nilfs_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index c7f4fef9ebf5..7ffe71a8dfb9 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -51,7 +51,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; - inode_add_bytes(inode, (1 << inode->i_blkbits) * n); + inode_add_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_add(n, &root->blocks_count); } @@ -60,7 +60,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; - inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); + inode_sub_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_sub(n, &root->blocks_count); } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index d56d3a5bea88..98835ed6bef4 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -57,7 +57,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); kaddr = kmap_atomic(bh->b_page); - memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); + memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); @@ -501,7 +501,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size, struct nilfs_mdt_info *mi = NILFS_MDT(inode); mi->mi_entry_size = entry_size; - mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; + mi->mi_entries_per_block = i_blocksize(inode) / entry_size; mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index bedcae2c28e6..7d18d62e8e07 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -723,7 +723,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, lock_page(page); if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << inode->i_blkbits, 0); + create_empty_buffers(page, i_blocksize(inode), 0); unlock_page(page); bh = head = page_buffers(page); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 11556b7d93ec..88a31e9340a0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -608,7 +608,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; - unsigned int bsize = 1 << inode->i_blkbits; + unsigned int bsize = i_blocksize(inode); if (!page_has_buffers(page)) create_empty_buffers(page, bsize, 0); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 7025d8c27999..3e04279446e8 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2924,7 +2924,7 @@ again: /* * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for * another try; otherwise, we are sure the MIGRATING state is there, - * drop the unneded state which blocked threads trying to DIRTY + * drop the unneeded state which blocked threads trying to DIRTY */ spin_lock(&res->spinlock); BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 7b6a146327d7..8836305eb378 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* We know that zero_from is block aligned */ for (block_start = zero_from; block_start < zero_to; block_start = block_end) { - block_end = block_start + (1 << inode->i_blkbits); + block_end = block_start + i_blocksize(inode); /* * block_start is block-aligned. Bump it by one to force diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 429088786e93..098f5c712569 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -44,17 +44,18 @@ #include "ocfs2_trace.h" -static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) +static int ocfs2_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; sigset_t oldset; int ret; ocfs2_block_signals(&oldset); - ret = filemap_fault(area, vmf); + ret = filemap_fault(vmf); ocfs2_unblock_signals(&oldset); - trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno, - area, vmf->page, vmf->pgoff); + trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno, + vma, vmf->page, vmf->pgoff); return ret; } @@ -127,10 +128,10 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ocfs2_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct buffer_head *di_bh = NULL; sigset_t oldset; int ret; @@ -160,7 +161,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ down_write(&OCFS2_I(inode)->ip_alloc_sem); - ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); + ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page); up_write(&OCFS2_I(inode)->ip_alloc_sem); diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b0ced669427e..c4ab6fdf17a0 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -400,8 +400,9 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, /* remove the op from the in progress hash table */ op = orangefs_devreq_remove_op(head.tag); if (!op) { - gossip_err("WARNING: No one's waiting for tag %llu\n", - llu(head.tag)); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: No one's waiting for tag %llu\n", + __func__, llu(head.tag)); return ret; } diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 551bc74ed2b8..5cd617980fbf 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -136,12 +136,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb, return -EINVAL; } -struct backing_dev_info orangefs_backing_dev_info = { - .name = "orangefs", - .ra_pages = 0, - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, -}; - /** ORANGEFS2 implementation of address space operations */ const struct address_space_operations orangefs_address_operations = { .readpage = orangefs_readpage, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 75375e90a63f..6333cbbdfef7 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -344,6 +344,11 @@ int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc) user_desc->size, user_desc->count); + if (user_desc->total_size < 0 || + user_desc->size < 0 || + user_desc->count < 0) + goto out; + /* * sanity check alignment and size of buffer that caller wants to * work with diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 27e75cf28b3a..791912da97d7 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -967,13 +967,13 @@ int orangefs_debugfs_new_client_string(void __user *arg) int ret; ret = copy_from_user(&client_debug_array_string, - (void __user *)arg, - ORANGEFS_MAX_DEBUG_STRING_LEN); + (void __user *)arg, + ORANGEFS_MAX_DEBUG_STRING_LEN); if (ret != 0) { pr_info("%s: CLIENT_STRING: copy_from_user failed\n", __func__); - return -EIO; + return -EFAULT; } /* @@ -988,17 +988,18 @@ int orangefs_debugfs_new_client_string(void __user *arg) */ client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] = '\0'; - + pr_info("%s: client debug array string has been received.\n", __func__); if (!help_string_initialized) { /* Build a proper debug help string. */ - if (orangefs_prepare_debugfs_help_string(0)) { + ret = orangefs_prepare_debugfs_help_string(0); + if (ret) { gossip_err("%s: no debug help string \n", __func__); - return -EIO; + return ret; } } @@ -1011,7 +1012,7 @@ int orangefs_debugfs_new_client_string(void __user *arg) help_string_initialized++; - return ret; + return 0; } int orangefs_debugfs_new_debug(void __user *arg) diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h index a3d84ffee905..f380f9ed1b28 100644 --- a/fs/orangefs/orangefs-dev-proto.h +++ b/fs/orangefs/orangefs-dev-proto.h @@ -50,8 +50,7 @@ * Misc constants. Please retain them as multiples of 8! * Otherwise 32-64 bit interactions will be messed up :) */ -#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000400 -#define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800 +#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000800 /* * The maximum number of directory entries in a single request is 96. diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 3bf803d732c5..70355a9a2596 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -529,7 +529,6 @@ extern spinlock_t orangefs_htable_ops_in_progress_lock; extern int hash_table_size; extern const struct address_space_operations orangefs_address_operations; -extern struct backing_dev_info orangefs_backing_dev_info; extern const struct inode_operations orangefs_file_inode_operations; extern const struct file_operations orangefs_file_operations; extern const struct inode_operations orangefs_symlink_inode_operations; diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 4113eb0495bf..c1b5174cb5a9 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -80,11 +80,6 @@ static int __init orangefs_init(void) int ret = -1; __u32 i = 0; - ret = bdi_init(&orangefs_backing_dev_info); - - if (ret) - return ret; - if (op_timeout_secs < 0) op_timeout_secs = 0; @@ -94,7 +89,7 @@ static int __init orangefs_init(void) /* initialize global book keeping data structures */ ret = op_cache_initialize(); if (ret < 0) - goto err; + goto out; ret = orangefs_inode_cache_initialize(); if (ret < 0) @@ -181,9 +176,6 @@ cleanup_inode: cleanup_op: op_cache_finalize(); -err: - bdi_destroy(&orangefs_backing_dev_info); - out: return ret; } @@ -207,8 +199,6 @@ static void __exit orangefs_exit(void) kfree(orangefs_htable_ops_in_progress); - bdi_destroy(&orangefs_backing_dev_info); - pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION); } diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 084954448f18..afd2f523b283 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -91,6 +91,13 @@ * Description: * Readahead cache buffer count and size. * + * What: /sys/fs/orangefs/readahead_readcnt + * Date: Jan 2017 + * Contact: Martin Brandenburg <martin@omnibond.com> + * Description: + * Number of buffers (in multiples of readahead_size) + * which can be read ahead for a single file at once. + * * What: /sys/fs/orangefs/acache/... * Date: Jun 2015 * Contact: Martin Brandenburg <martin@omnibond.com> @@ -329,7 +336,8 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj, if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) && (!strcmp(attr->attr.name, "readahead_count") || !strcmp(attr->attr.name, "readahead_size") || - !strcmp(attr->attr.name, "readahead_count_size"))) { + !strcmp(attr->attr.name, "readahead_count_size") || + !strcmp(attr->attr.name, "readahead_readcnt"))) { rc = -EINVAL; goto out; } @@ -360,6 +368,11 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj, "readahead_count_size")) new_op->upcall.req.param.op = ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE; + + else if (!strcmp(attr->attr.name, + "readahead_readcnt")) + new_op->upcall.req.param.op = + ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT; } else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) { if (!strcmp(attr->attr.name, "timeout_msecs")) new_op->upcall.req.param.op = @@ -542,7 +555,8 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj, if (!(orangefs_features & ORANGEFS_FEATURE_READAHEAD) && (!strcmp(attr->attr.name, "readahead_count") || !strcmp(attr->attr.name, "readahead_size") || - !strcmp(attr->attr.name, "readahead_count_size"))) { + !strcmp(attr->attr.name, "readahead_count_size") || + !strcmp(attr->attr.name, "readahead_readcnt"))) { rc = -EINVAL; goto out; } @@ -609,6 +623,15 @@ static ssize_t sysfs_service_op_store(struct kobject *kobj, new_op->upcall.req.param.u.value32[0] = val1; new_op->upcall.req.param.u.value32[1] = val2; goto value_set; + } else if (!strcmp(attr->attr.name, + "readahead_readcnt")) { + if ((val >= 0)) { + new_op->upcall.req.param.op = + ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT; + } else { + rc = 0; + goto out; + } } } else if (!strcmp(kobj->name, ACACHE_KOBJ_ID)) { @@ -812,6 +835,10 @@ static struct orangefs_attribute readahead_count_size_attribute = __ATTR(readahead_count_size, 0664, sysfs_service_op_show, sysfs_service_op_store); +static struct orangefs_attribute readahead_readcnt_attribute = + __ATTR(readahead_readcnt, 0664, sysfs_service_op_show, + sysfs_service_op_store); + static struct orangefs_attribute perf_counter_reset_attribute = __ATTR(perf_counter_reset, 0664, @@ -838,6 +865,7 @@ static struct attribute *orangefs_default_attrs[] = { &readahead_count_attribute.attr, &readahead_size_attribute.attr, &readahead_count_size_attribute.attr, + &readahead_readcnt_attribute.attr, &perf_counter_reset_attribute.attr, &perf_history_size_attribute.attr, &perf_time_interval_secs_attribute.attr, diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 06af81f71e10..9b96b99539d6 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -306,7 +306,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass) break; case S_IFDIR: inode->i_size = PAGE_SIZE; - orangefs_inode->blksize = (1 << inode->i_blkbits); + orangefs_inode->blksize = i_blocksize(inode); spin_lock(&inode->i_lock); inode_set_bytes(inode, inode->i_size); spin_unlock(&inode->i_lock); @@ -316,7 +316,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass) if (new) { inode->i_size = (loff_t)strlen(new_op-> downcall.resp.getattr.link_target); - orangefs_inode->blksize = (1 << inode->i_blkbits); + orangefs_inode->blksize = i_blocksize(inode); ret = strscpy(orangefs_inode->link_target, new_op->downcall.resp.getattr.link_target, ORANGEFS_NAME_MAX); diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h index af0b0e36d559..b8249f8fdd80 100644 --- a/fs/orangefs/upcall.h +++ b/fs/orangefs/upcall.h @@ -182,6 +182,7 @@ enum orangefs_param_request_op { ORANGEFS_PARAM_REQUEST_OP_READAHEAD_SIZE = 26, ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT = 27, ORANGEFS_PARAM_REQUEST_OP_READAHEAD_COUNT_SIZE = 28, + ORANGEFS_PARAM_REQUEST_OP_READAHEAD_READCNT = 29, }; struct orangefs_param_request_s { diff --git a/fs/proc/base.c b/fs/proc/base.c index b73b4de8fb36..1e1e182d571b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -292,101 +292,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, } } else { /* - * Command line (1 string) occupies ARGV and maybe - * extends into ENVP. - */ - if (len1 + len2 <= *pos) - goto skip_argv_envp; - if (len1 <= *pos) - goto skip_argv; - - p = arg_start + *pos; - len = len1 - *pos; - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* - * Command line can be shorter than whole ARGV - * even if last "marker" byte says it is not. - */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } - - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; - } -skip_argv: - /* * Command line (1 string) occupies ARGV and * extends into ENVP. */ - if (len1 <= *pos) { - p = env_start + *pos - len1; - len = len1 + len2 - *pos; - } else { - p = env_start; - len = len2; + struct { + unsigned long p; + unsigned long len; + } cmdline[2] = { + { .p = arg_start, .len = len1 }, + { .p = env_start, .len = len2 }, + }; + loff_t pos1 = *pos; + unsigned int i; + + i = 0; + while (i < 2 && pos1 >= cmdline[i].len) { + pos1 -= cmdline[i].len; + i++; } - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* Find EOS. */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; + while (i < 2) { + p = cmdline[i].p + pos1; + len = cmdline[i].len - pos1; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; } - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; + /* Only first chunk can be read partially. */ + pos1 = 0; + i++; } -skip_argv_envp: - ; } out_free_page: @@ -729,11 +697,11 @@ static int proc_pid_permission(struct inode *inode, int mask) task = get_proc_task(inode); if (!task) return -ESRCH; - has_perms = has_pid_permissions(pid, task, 1); + has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { - if (pid->hide_pid == 2) { + if (pid->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process @@ -798,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) if (!IS_ERR_OR_NULL(mm)) { /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); + mmgrab(mm); /* but do not pin its memory */ mmput(mm); } @@ -845,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, return -ENOMEM; copied = 0; - if (!atomic_inc_not_zero(&mm->mm_users)) + if (!mmget_not_zero(mm)) goto free; /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ @@ -953,7 +921,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, return -ENOMEM; ret = 0; - if (!atomic_inc_not_zero(&mm->mm_users)) + if (!mmget_not_zero(mm)) goto free; down_read(&mm->mmap_sem); @@ -1096,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) if (p) { if (atomic_read(&p->mm->mm_users) > 1) { mm = p->mm; - atomic_inc(&mm->mm_count); + mmgrab(mm); } task_unlock(p); } @@ -1769,7 +1737,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { - if (!has_pid_permissions(pid, task, 2)) { + if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, @@ -3200,7 +3168,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) int len; cond_resched(); - if (!has_pid_permissions(ns, iter.task, 2)) + if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%d", iter.tgid); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f6a01f09f79d..06c73904d497 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, struct rb_node *node = dir->subdir.rb_node; while (node) { - struct proc_dir_entry *de = container_of(node, - struct proc_dir_entry, - subdir_node); + struct proc_dir_entry *de = rb_entry(node, + struct proc_dir_entry, + subdir_node); int result = proc_match(len, name, de); if (result < 0) @@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, /* Figure out where to put new node */ while (*new) { - struct proc_dir_entry *this = - container_of(*new, struct proc_dir_entry, subdir_node); + struct proc_dir_entry *this = rb_entry(*new, + struct proc_dir_entry, + subdir_node); int result = proc_match(de->namelen, de->name, this); parent = *new; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7ad9ed7958af..2cc7a8030275 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -107,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); - if (pid->hide_pid != 0) + if (pid->hide_pid != HIDEPID_OFF) seq_printf(seq, ",hidepid=%u", pid->hide_pid); return 0; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0b80ad87b4d6..ea9f3d1ae830 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff; phdr->p_vaddr = (size_t)m->addr; - phdr->p_paddr = 0; + if (m->type == KCORE_RAM || m->type == KCORE_TEXT) + phdr->p_paddr = __pa(m->addr); + else + phdr->p_paddr = (elf_addr_t)-1; phdr->p_filesz = phdr->p_memsz = m->size; phdr->p_align = PAGE_SIZE; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 1988440b2049..b90da888b81a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -58,7 +58,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid) case Opt_hidepid: if (match_int(&args[0], &option)) return 0; - if (option < 0 || option > 2) { + if (option < HIDEPID_OFF || + option > HIDEPID_INVISIBLE) { pr_err("proc: hidepid value must be between 0 and 2.\n"); return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8f96a49178d0..ee3efb229ef6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -167,7 +167,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + if (!mm || !mmget_not_zero(mm)) return NULL; down_read(&mm->mmap_sem); @@ -1352,7 +1352,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, unsigned long end_vaddr; int ret = 0, copied = 0; - if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + if (!mm || !mmget_not_zero(mm)) goto out; ret = -EINVAL; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 37175621e890..1ef97cfcf422 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -219,7 +219,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + if (!mm || !mmget_not_zero(mm)) return NULL; down_read(&mm->mmap_sem); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 5105b1599981..885d445afa0d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, * On s390 the fault handler is used for memory regions that can't be mapped * directly with remap_pfn_range(). */ -static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int mmap_vmcore_fault(struct vm_fault *vmf) { #ifdef CONFIG_S390 - struct address_space *mapping = vma->vm_file->f_mapping; + struct address_space *mapping = vmf->vma->vm_file->f_mapping; pgoff_t index = vmf->pgoff; struct page *page; loff_t offset; @@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma, } return 0; fail: - do_munmap(vma->vm_mm, from, len); + do_munmap(vma->vm_mm, from, len, NULL); return -EAGAIN; } @@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) return 0; fail: - do_munmap(vma->vm_mm, vma->vm_start, len); + do_munmap(vma->vm_mm, vma->vm_start, len, NULL); return -EAGAIN; } #else diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 729677e18e36..efab7b64925b 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen) { int ret; - ret = lz4_compress(in, inlen, out, &outlen, workspace); - if (ret) { - pr_err("lz4_compress error, ret = %d!\n", ret); + ret = LZ4_compress_default(in, out, inlen, outlen, workspace); + if (!ret) { + pr_err("LZ4_compress_default error; compression failed!\n"); return -EIO; } - return outlen; + return ret; } static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen) { int ret; - ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen); - if (ret) { - pr_err("lz4_decompress error, ret = %d!\n", ret); + ret = LZ4_decompress_safe(in, out, inlen, outlen); + if (ret < 0) { + /* + * LZ4_decompress_safe will return an error code + * (< 0) if decompression failed + */ + pr_err("LZ4_decompress_safe error, ret = %d!\n", ret); return -EIO; } - return outlen; + return ret; } static void allocate_lz4(void) { - big_oops_buf_sz = lz4_compressbound(psinfo->bufsize); + big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize); big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); if (big_oops_buf) { workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 2f8c5c9bdaf6..b396eb09f288 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, int ret = 0; th.t_trans_id = 0; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); if (logit) { reiserfs_write_lock(s); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index cfeae9b0a2b7..a6ab9d64ea1b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -525,7 +525,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, * referenced in convert_tail_for_hole() that may be called from * reiserfs_get_block() */ - bh_result->b_size = (1 << inode->i_blkbits); + bh_result->b_size = i_blocksize(inode); ret = reiserfs_get_block(inode, iblock, bh_result, create | GET_BLOCK_NO_DANGLE); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index e314cb30a181..feabcde0290d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1166,7 +1166,7 @@ static int reiserfs_parse_options(struct super_block *s, if (!strcmp(arg, "auto")) { /* From JFS code, to auto-get the size. */ *blocks = - s->s_bdev->bd_inode->i_size >> s-> + i_size_read(s->s_bdev->bd_inode) >> s-> s_blocksize_bits; } else { *blocks = simple_strtoul(arg, &p, 0); diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index ff4468bd18b0..95da65366548 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, struct squashfs_lz4 *stream = strm; void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t dest_len = output->length; for (i = 0; i < b; i++) { avail = min(bytes, msblk->devblksize - offset); @@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[i]); } - res = lz4_decompress_unknownoutputsize(stream->input, length, - stream->output, &dest_len); - if (res) + res = LZ4_decompress_safe(stream->input, stream->output, + length, output->length); + + if (res < 0) return -EIO; - bytes = dest_len; + bytes = res; data = squashfs_first_page(output); buff = stream->output; while (data) { @@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, } squashfs_finish_page(output); - return dest_len; + return res; } const struct squashfs_decompressor squashfs_lz4_comp_ops = { diff --git a/fs/stat.c b/fs/stat.c index a268b7f27adf..3f14d1ef0868 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->blksize = (1 << inode->i_blkbits); + stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b0d783774c96..d9ae86f96df7 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1506,11 +1506,10 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made writable. * UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) +static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8ec6b3df0bc7..a8d8f71ef8bd 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1193,7 +1193,7 @@ int udf_setsize(struct inode *inode, loff_t newsize) { int err; struct udf_inode_info *iinfo; - int bsize = 1 << inode->i_blkbits; + int bsize = i_blocksize(inode); if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 18406158e13f..3c421d06a18e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -71,6 +71,13 @@ struct userfaultfd_fork_ctx { struct list_head list; }; +struct userfaultfd_unmap_ctx { + struct userfaultfd_ctx *ctx; + unsigned long start; + unsigned long end; + struct list_head list; +}; + struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_t wq; @@ -681,16 +688,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); } -void madvise_userfault_dontneed(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end) +void userfaultfd_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue ewq; ctx = vma->vm_userfaultfd_ctx.ctx; - if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_MADVDONTNEED)) + if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) return; userfaultfd_ctx_get(ctx); @@ -700,15 +707,101 @@ void madvise_userfault_dontneed(struct vm_area_struct *vma, msg_init(&ewq.msg); - ewq.msg.event = UFFD_EVENT_MADVDONTNEED; - ewq.msg.arg.madv_dn.start = start; - ewq.msg.arg.madv_dn.end = end; + ewq.msg.event = UFFD_EVENT_REMOVE; + ewq.msg.arg.remove.start = start; + ewq.msg.arg.remove.end = end; userfaultfd_event_wait_completion(ctx, &ewq); down_read(&mm->mmap_sem); } +static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, + unsigned long start, unsigned long end) +{ + struct userfaultfd_unmap_ctx *unmap_ctx; + + list_for_each_entry(unmap_ctx, unmaps, list) + if (unmap_ctx->ctx == ctx && unmap_ctx->start == start && + unmap_ctx->end == end) + return true; + + return false; +} + +int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *unmaps) +{ + for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { + struct userfaultfd_unmap_ctx *unmap_ctx; + struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; + + if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || + has_unmap_ctx(ctx, unmaps, start, end)) + continue; + + unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); + if (!unmap_ctx) + return -ENOMEM; + + userfaultfd_ctx_get(ctx); + unmap_ctx->ctx = ctx; + unmap_ctx->start = start; + unmap_ctx->end = end; + list_add_tail(&unmap_ctx->list, unmaps); + } + + return 0; +} + +void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) +{ + struct userfaultfd_unmap_ctx *ctx, *n; + struct userfaultfd_wait_queue ewq; + + list_for_each_entry_safe(ctx, n, uf, list) { + msg_init(&ewq.msg); + + ewq.msg.event = UFFD_EVENT_UNMAP; + ewq.msg.arg.remove.start = ctx->start; + ewq.msg.arg.remove.end = ctx->end; + + userfaultfd_event_wait_completion(ctx->ctx, &ewq); + + list_del(&ctx->list); + kfree(ctx); + } +} + +void userfaultfd_exit(struct mm_struct *mm) +{ + struct vm_area_struct *vma = mm->mmap; + + /* + * We can do the vma walk without locking because the caller + * (exit_mm) knows it now has exclusive access + */ + while (vma) { + struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; + + if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) { + struct userfaultfd_wait_queue ewq; + + userfaultfd_ctx_get(ctx); + + msg_init(&ewq.msg); + ewq.msg.event = UFFD_EVENT_EXIT; + + userfaultfd_event_wait_completion(ctx, &ewq); + + ctx->features &= ~UFFD_FEATURE_EVENT_EXIT; + } + + vma = vma->vm_next; + } +} + static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; @@ -1514,6 +1607,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, uffdio_copy.len); mmput(ctx->mm); + } else { + return -ENOSPC; } if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; @@ -1712,17 +1807,17 @@ static void init_once_userfaultfd_ctx(void *mem) } /** - * userfaultfd_file_create - Creates an userfaultfd file pointer. + * userfaultfd_file_create - Creates a userfaultfd file pointer. * @flags: Flags for the userfaultfd file. * - * This function creates an userfaultfd file pointer, w/out installing + * This function creates a userfaultfd file pointer, w/out installing * it into the fd table. This is useful when the userfaultfd file is * used during the initialization of data structures that require * extra setup after the userfaultfd creation. So the userfaultfd * creation is split into the file pointer creation phase, and the * file descriptor installation phase. In this way races with * userspace closing the newly installed file descriptor can be - * avoided. Returns an userfaultfd file pointer, or a proper error + * avoided. Returns a userfaultfd file pointer, or a proper error * pointer. */ static struct file *userfaultfd_file_create(int flags) @@ -1752,7 +1847,7 @@ static struct file *userfaultfd_file_create(int flags) ctx->released = false; ctx->mm = current->mm; /* prevent the mm struct to be freed */ - atomic_inc(&ctx->mm->mm_count); + mmgrab(ctx->mm); file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1ff9df7a3ce8..bf65a9ea8642 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -103,9 +103,9 @@ xfs_finish_page_writeback( unsigned int bsize; ASSERT(bvec->bv_offset < PAGE_SIZE); - ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0); + ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); ASSERT(end < PAGE_SIZE); - ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0); + ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); bh = head = page_buffers(bvec->bv_page); @@ -349,7 +349,7 @@ xfs_map_blocks( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - ssize_t count = 1 << inode->i_blkbits; + ssize_t count = i_blocksize(inode); xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int bmapi_flags = XFS_BMAPI_ENTIRE; @@ -758,7 +758,7 @@ xfs_aops_discard_page( break; } next_buffer: - offset += 1 << inode->i_blkbits; + offset += i_blocksize(inode); } while ((bh = bh->b_this_page) != head); @@ -846,7 +846,7 @@ xfs_writepage_map( LIST_HEAD(submit_list); struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; - ssize_t len = 1 << inode->i_blkbits; + ssize_t len = i_blocksize(inode); int error = 0; int count = 0; int uptodate = 1; @@ -1210,7 +1210,7 @@ xfs_map_trim_size( offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, - 1 << inode->i_blkbits); + i_blocksize(inode)); } if (mapping_size > LONG_MAX) mapping_size = LONG_MAX; @@ -1241,7 +1241,7 @@ xfs_get_blocks( return -EIO; offset = (xfs_off_t)iblock << inode->i_blkbits; - ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); + ASSERT(bh_result->b_size >= i_blocksize(inode)); size = bh_result->b_size; if (offset >= i_size_read(inode)) @@ -1389,7 +1389,7 @@ xfs_vm_set_page_dirty( if (offset < end_offset) set_buffer_dirty(bh); bh = bh->b_this_page; - offset += 1 << inode->i_blkbits; + offset += i_blocksize(inode); } while (bh != head); } /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 022014016d80..35703a801372 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -754,7 +754,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; @@ -776,7 +776,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_INSERT_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; new_size = i_size_read(inode) + len; if (offset & blksize_mask || len & blksize_mask) { @@ -1379,22 +1379,21 @@ xfs_file_llseek( */ STATIC int xfs_filemap_page_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_page_mkwrite(XFS_I(inode)); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); } else { - ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); } @@ -1406,23 +1405,22 @@ xfs_filemap_page_mkwrite( STATIC int xfs_filemap_fault( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_fault(XFS_I(inode)); /* DAX can shortcut the normal fault path on write faults! */ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) - return xfs_filemap_page_mkwrite(vma, vmf); + return xfs_filemap_page_mkwrite(vmf); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); else - ret = filemap_fault(vma, vmf); + ret = filemap_fault(vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); return ret; @@ -1431,13 +1429,14 @@ xfs_filemap_fault( /* * Similar to xfs_filemap_fault(), the DAX fault path can call into here on * both read and write faults. Hence we need to handle both cases. There is no - * ->pmd_mkwrite callout for huge pages, so we have a single function here to + * ->huge_mkwrite callout for huge pages, so we have a single function here to * handle both cases here. @flags carries the information on the type of fault * occuring. */ STATIC int -xfs_filemap_pmd_fault( - struct vm_fault *vmf) +xfs_filemap_huge_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size) { struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); @@ -1446,7 +1445,7 @@ xfs_filemap_pmd_fault( if (!IS_DAX(inode)) return VM_FAULT_FALLBACK; - trace_xfs_filemap_pmd_fault(ip); + trace_xfs_filemap_huge_fault(ip); if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); @@ -1454,7 +1453,7 @@ xfs_filemap_pmd_fault( } xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_pmd_fault(vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (vmf->flags & FAULT_FLAG_WRITE) @@ -1471,11 +1470,10 @@ xfs_filemap_pmd_fault( */ static int xfs_filemap_pfn_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret = VM_FAULT_NOPAGE; loff_t size; @@ -1483,7 +1481,7 @@ xfs_filemap_pfn_mkwrite( trace_xfs_filemap_pfn_mkwrite(ip); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); /* check if the faulting page hasn't raced with truncate */ xfs_ilock(ip, XFS_MMAPLOCK_SHARED); @@ -1491,7 +1489,7 @@ xfs_filemap_pfn_mkwrite( if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else if (IS_DAX(inode)) - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); sb_end_pagefault(inode->i_sb); return ret; @@ -1500,7 +1498,7 @@ xfs_filemap_pfn_mkwrite( static const struct vm_operations_struct xfs_file_vm_ops = { .fault = xfs_filemap_fault, - .pmd_fault = xfs_filemap_pmd_fault, + .huge_fault = xfs_filemap_huge_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, .pfn_mkwrite = xfs_filemap_pfn_mkwrite, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index fb7555e73a62..383ac227ce2c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -687,7 +687,7 @@ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); -DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); +DEFINE_INODE_EVENT(xfs_filemap_huge_fault); DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); |