diff options
Diffstat (limited to 'fs/xfs')
34 files changed, 383 insertions, 416 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b56ff451adce..5b6fcb9b44e2 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2805,7 +2805,7 @@ xfs_btree_split_worker( struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args, work); unsigned long pflags; - unsigned long new_pflags = PF_MEMALLOC_NOFS; + unsigned long new_pflags = 0; /* * we are in a transaction context here, but may also be doing work @@ -2817,12 +2817,20 @@ xfs_btree_split_worker( new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; current_set_flags_nested(&pflags, new_pflags); + xfs_trans_set_context(args->cur->bc_tp); args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, args->key, args->curp, args->stat); - complete(args->done); + xfs_trans_clear_context(args->cur->bc_tp); current_restore_flags_nested(&pflags, new_pflags); + + /* + * Do not access args after complete() has run here. We don't own args + * and the owner may run and free args before we return here. + */ + complete(args->done); + } /* diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 6fad140d4c8e..6bf7d8b7d743 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -770,8 +770,6 @@ struct xfs_scrub_metadata { /* * ioctl commands that are used by Linux filesystems */ -#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS -#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS #define XFS_IOC_GETVERSION FS_IOC_GETVERSION /* @@ -782,8 +780,6 @@ struct xfs_scrub_metadata { #define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) #define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) #define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) -#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR -#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR #define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) #define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) #define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index f88694f22d05..813b5f219113 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -63,8 +63,8 @@ xbitmap_init( static int xbitmap_range_cmp( void *priv, - struct list_head *a, - struct list_head *b) + const struct list_head *a, + const struct list_head *b) { struct xbitmap_range *ap; struct xbitmap_range *bp; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 779cb73b3d00..d02bef24b32b 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -238,7 +238,8 @@ xfs_acl_set_mode( } int -xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type) { umode_t mode; bool set_mode = false; @@ -252,7 +253,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_update_mode(inode, &mode, &acl); + error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); if (error) return error; set_mode = true; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index c042c0868016..7bdb3a4ed798 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -11,7 +11,8 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); -extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); #else diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4304c6416fbb..b4186d666157 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc( * We hand off the transaction to the completion thread now, so * clear the flag here. */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_clear_context(tp); return 0; } @@ -125,7 +125,7 @@ xfs_setfilesize_ioend( * thus we need to mark ourselves as being in a transaction manually. * Similarly for freeze protection. */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_set_context(tp); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); /* we abort the update if there was an IO error */ @@ -568,6 +568,12 @@ xfs_vm_writepage( { struct xfs_writepage_ctx wpc = { }; + if (WARN_ON_ONCE(current->journal_info)) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); } @@ -578,6 +584,13 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; + /* + * Writing back data in a transaction context can result in recursive + * transactions. This is bad, so issue a warning and get out of here. + */ + if (WARN_ON_ONCE(current->journal_info)) + return 0; + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index e2148f2d5d6b..17f36db2f792 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -6,7 +6,7 @@ static inline unsigned int bio_max_vecs(unsigned int count) { - return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES); + return bio_max_segs(howmany(count, PAGE_SIZE)); } int diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 2344757ede63..e3a691937e92 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -265,8 +265,8 @@ xfs_trans_log_finish_bmap_update( static int xfs_bmap_update_diff_items( void *priv, - struct list_head *a, - struct list_head *b) + const struct list_head *a, + const struct list_head *b) { struct xfs_bmap_intent *ba; struct xfs_bmap_intent *bb; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f6e5235df7c9..592800c8852f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1480,7 +1480,7 @@ xfs_buf_ioapply_map( int op) { int page_index; - int total_nr_pages = bp->b_page_count; + unsigned int total_nr_pages = bp->b_page_count; int nr_pages; struct bio *bio; sector_t sector = bp->b_maps[map].bm_bn; @@ -1505,7 +1505,7 @@ xfs_buf_ioapply_map( next_chunk: atomic_inc(&bp->b_io_remaining); - nr_pages = min(total_nr_pages, BIO_MAX_PAGES); + nr_pages = bio_max_segs(total_nr_pages); bio = bio_alloc(GFP_NOIO, nr_pages); bio_set_dev(bio, bp->b_target->bt_bdev); @@ -2124,9 +2124,9 @@ xfs_buf_delwri_queue( */ static int xfs_buf_cmp( - void *priv, - struct list_head *a, - struct list_head *b) + void *priv, + const struct list_head *a, + const struct list_head *b) { struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 3991e59cfd18..a4075685d9eb 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -344,7 +344,6 @@ xfs_extent_busy_trim( ASSERT(*len > 0); spin_lock(&args->pag->pagb_lock); -restart: fbno = *bno; flen = *len; rbp = args->pag->pagb_tree.rb_node; @@ -363,19 +362,6 @@ restart: continue; } - /* - * If this is a metadata allocation, try to reuse the busy - * extent instead of trimming the allocation. - */ - if (!(args->datatype & XFS_ALLOC_USERDATA) && - !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { - if (!xfs_extent_busy_update_extent(args->mp, args->pag, - busyp, fbno, flen, - false)) - goto restart; - continue; - } - if (bbno <= fbno) { /* start overlap */ @@ -643,8 +629,8 @@ xfs_extent_busy_wait_all( int xfs_extent_busy_ag_cmp( void *priv, - struct list_head *l1, - struct list_head *l2) + const struct list_head *l1, + const struct list_head *l2) { struct xfs_extent_busy *b1 = container_of(l1, struct xfs_extent_busy, list); diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 990ab3891971..8aea07100092 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -58,7 +58,8 @@ void xfs_extent_busy_wait_all(struct xfs_mount *mp); int -xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); +xfs_extent_busy_ag_cmp(void *priv, const struct list_head *a, + const struct list_head *b); static inline void xfs_extent_busy_sort(struct list_head *list) { diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 93223ebb3372..2424230ca2c3 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -397,8 +397,8 @@ xfs_trans_free_extent( static int xfs_extent_free_diff_items( void *priv, - struct list_head *a, - struct list_head *b) + const struct list_head *a, + const struct list_head *b) { struct xfs_mount *mp = priv; struct xfs_extent_free_item *ra; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 68ca1b40d8c7..a007ca0711d9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -29,6 +29,7 @@ #include <linux/backing-dev.h> #include <linux/mman.h> #include <linux/fadvise.h> +#include <linux/mount.h> static const struct vm_operations_struct xfs_file_vm_ops; @@ -1051,7 +1052,8 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_vn_setattr_size(file_dentry(file), &iattr); + error = xfs_vn_setattr_size(file_mnt_user_ns(file), + file_dentry(file), &iattr); if (error) goto out_unlock; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 636ac13b1df2..2a8bdf33e6c4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -766,6 +766,7 @@ xfs_inode_inherit_flags2( */ static int xfs_init_new_inode( + struct user_namespace *mnt_userns, struct xfs_trans *tp, struct xfs_inode *pip, xfs_ino_t ino, @@ -811,11 +812,11 @@ xfs_init_new_inode( if (dir && !(dir->i_mode & S_ISGID) && (mp->m_flags & XFS_MOUNT_GRPID)) { - inode->i_uid = current_fsuid(); + inode_fsuid_set(inode, mnt_userns); inode->i_gid = dir->i_gid; inode->i_mode = mode; } else { - inode_init_owner(inode, dir, mode); + inode_init_owner(mnt_userns, inode, dir, mode); } /* @@ -824,7 +825,8 @@ xfs_init_new_inode( * (and only if the irix_sgid_inherit compatibility variable is set). */ if (irix_sgid_inherit && - (inode->i_mode & S_ISGID) && !in_group_p(inode->i_gid)) + (inode->i_mode & S_ISGID) && + !in_group_p(i_gid_into_mnt(mnt_userns, inode))) inode->i_mode &= ~S_ISGID; ip->i_d.di_size = 0; @@ -901,6 +903,7 @@ xfs_init_new_inode( */ int xfs_dir_ialloc( + struct user_namespace *mnt_userns, struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode, @@ -933,7 +936,8 @@ xfs_dir_ialloc( return error; ASSERT(ino != NULLFSINO); - return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp); + return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev, + prid, ipp); } /* @@ -973,6 +977,7 @@ xfs_bumplink( int xfs_create( + struct user_namespace *mnt_userns, xfs_inode_t *dp, struct xfs_name *name, umode_t mode, @@ -1002,9 +1007,10 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), + mapped_fsgid(mnt_userns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; @@ -1046,7 +1052,8 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip); + error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev, + prid, &ip); if (error) goto out_trans_cancel; @@ -1127,6 +1134,7 @@ xfs_create( int xfs_create_tmpfile( + struct user_namespace *mnt_userns, struct xfs_inode *dp, umode_t mode, struct xfs_inode **ipp) @@ -1150,9 +1158,10 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), + mapped_fsgid(mnt_userns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; @@ -1164,7 +1173,7 @@ xfs_create_tmpfile( if (error) goto out_release_dquots; - error = xfs_dir_ialloc(&tp, dp, mode, 0, 0, prid, &ip); + error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid, &ip); if (error) goto out_trans_cancel; @@ -2977,13 +2986,15 @@ out_trans_abort: */ static int xfs_rename_alloc_whiteout( + struct user_namespace *mnt_userns, struct xfs_inode *dp, struct xfs_inode **wip) { struct xfs_inode *tmpfile; int error; - error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile); + error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE, + &tmpfile); if (error) return error; @@ -3005,6 +3016,7 @@ xfs_rename_alloc_whiteout( */ int xfs_rename( + struct user_namespace *mnt_userns, struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, @@ -3036,7 +3048,7 @@ xfs_rename( */ if (flags & RENAME_WHITEOUT) { ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); - error = xfs_rename_alloc_whiteout(target_dp, &wip); + error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip); if (error) return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index eca333f5f715..88ee4c3930ae 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -369,15 +369,18 @@ int xfs_release(struct xfs_inode *ip); void xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); -int xfs_create(struct xfs_inode *dp, struct xfs_name *name, +int xfs_create(struct user_namespace *mnt_userns, + struct xfs_inode *dp, struct xfs_name *name, umode_t mode, dev_t rdev, struct xfs_inode **ipp); -int xfs_create_tmpfile(struct xfs_inode *dp, umode_t mode, +int xfs_create_tmpfile(struct user_namespace *mnt_userns, + struct xfs_inode *dp, umode_t mode, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); -int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, +int xfs_rename(struct user_namespace *mnt_userns, + struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip, unsigned int flags); @@ -407,9 +410,10 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); -int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode, - xfs_nlink_t nlink, dev_t dev, prid_t prid, - struct xfs_inode **ipp); +int xfs_dir_ialloc(struct user_namespace *mnt_userns, + struct xfs_trans **tpp, struct xfs_inode *dp, + umode_t mode, xfs_nlink_t nlink, dev_t dev, + prid_t prid, struct xfs_inode **ipp); static inline int xfs_itruncate_extents( diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 248083ea0276..bbda105a2ce5 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -40,6 +40,7 @@ #include <linux/mount.h> #include <linux/namei.h> +#include <linux/fileattr.h> /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to @@ -693,7 +694,8 @@ xfs_ioc_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = bf->l_start; - error = xfs_vn_setattr_size(file_dentry(filp), &iattr); + error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp), + &iattr); if (error) goto out_unlock; @@ -734,13 +736,15 @@ xfs_fsinumbers_fmt( STATIC int xfs_ioc_fsbulkstat( - xfs_mount_t *mp, + struct file *file, unsigned int cmd, void __user *arg) { + struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, + .mnt_userns = file_mnt_user_ns(file), .ocount = 0, }; xfs_ino_t lastino; @@ -908,13 +912,15 @@ xfs_bulk_ireq_teardown( /* Handle the v5 bulkstat ioctl. */ STATIC int xfs_ioc_bulkstat( - struct xfs_mount *mp, + struct file *file, unsigned int cmd, struct xfs_bulkstat_req __user *arg) { + struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; struct xfs_bulk_ireq hdr; struct xfs_ibulk breq = { .mp = mp, + .mnt_userns = file_mnt_user_ns(file), }; int error; @@ -1048,73 +1054,15 @@ xfs_ioc_ag_geometry( * Linux extended inode flags interface. */ -STATIC unsigned int -xfs_merge_ioc_xflags( - unsigned int flags, - unsigned int start) -{ - unsigned int xflags = start; - - if (flags & FS_IMMUTABLE_FL) - xflags |= FS_XFLAG_IMMUTABLE; - else - xflags &= ~FS_XFLAG_IMMUTABLE; - if (flags & FS_APPEND_FL) - xflags |= FS_XFLAG_APPEND; - else - xflags &= ~FS_XFLAG_APPEND; - if (flags & FS_SYNC_FL) - xflags |= FS_XFLAG_SYNC; - else - xflags &= ~FS_XFLAG_SYNC; - if (flags & FS_NOATIME_FL) - xflags |= FS_XFLAG_NOATIME; - else - xflags &= ~FS_XFLAG_NOATIME; - if (flags & FS_NODUMP_FL) - xflags |= FS_XFLAG_NODUMP; - else - xflags &= ~FS_XFLAG_NODUMP; - if (flags & FS_DAX_FL) - xflags |= FS_XFLAG_DAX; - else - xflags &= ~FS_XFLAG_DAX; - - return xflags; -} - -STATIC unsigned int -xfs_di2lxflags( - uint16_t di_flags, - uint64_t di_flags2) -{ - unsigned int flags = 0; - - if (di_flags & XFS_DIFLAG_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (di_flags & XFS_DIFLAG_APPEND) - flags |= FS_APPEND_FL; - if (di_flags & XFS_DIFLAG_SYNC) - flags |= FS_SYNC_FL; - if (di_flags & XFS_DIFLAG_NOATIME) - flags |= FS_NOATIME_FL; - if (di_flags & XFS_DIFLAG_NODUMP) - flags |= FS_NODUMP_FL; - if (di_flags2 & XFS_DIFLAG2_DAX) { - flags |= FS_DAX_FL; - } - return flags; -} - static void xfs_fill_fsxattr( struct xfs_inode *ip, - bool attr, - struct fsxattr *fa) + int whichfork, + struct fileattr *fa) { - struct xfs_ifork *ifp = attr ? ip->i_afp : &ip->i_df; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - simple_fill_fsxattr(fa, xfs_ip2xflags(ip)); + fileattr_fill_xflags(fa, xfs_ip2xflags(ip)); fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; fa->fsx_cowextsize = ip->i_d.di_cowextsize << ip->i_mount->m_sb.sb_blocklog; @@ -1126,19 +1074,33 @@ xfs_fill_fsxattr( } STATIC int -xfs_ioc_fsgetxattr( +xfs_ioc_fsgetxattra( xfs_inode_t *ip, - int attr, void __user *arg) { - struct fsxattr fa; + struct fileattr fa; xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_fill_fsxattr(ip, attr, &fa); + xfs_fill_fsxattr(ip, XFS_ATTR_FORK, &fa); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + return copy_fsxattr_to_user(&fa, arg); +} + +int +xfs_fileattr_get( + struct dentry *dentry, + struct fileattr *fa) +{ + struct xfs_inode *ip = XFS_I(d_inode(dentry)); + + if (d_is_special(dentry)) + return -ENOTTY; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + xfs_fill_fsxattr(ip, XFS_DATA_FORK, fa); xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; return 0; } @@ -1205,7 +1167,7 @@ static int xfs_ioctl_setattr_xflags( struct xfs_trans *tp, struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; uint64_t di_flags2; @@ -1248,7 +1210,7 @@ xfs_ioctl_setattr_xflags( static void xfs_ioctl_setattr_prepare_dax( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -1293,24 +1255,11 @@ xfs_ioctl_setattr_get_trans( if (error) goto out_error; - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal to the file owner - * ID, except in cases where the CAP_FSETID capability is applicable. - */ - if (!inode_owner_or_capable(VFS_I(ip))) { - error = -EPERM; - goto out_cancel; - } - if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); return tp; -out_cancel: - xfs_trans_cancel(tp); out_error: return ERR_PTR(error); } @@ -1334,12 +1283,15 @@ out_error: static int xfs_ioctl_setattr_check_extsize( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; xfs_extlen_t size; xfs_fsblock_t extsize_fsb; + if (!fa->fsx_valid) + return 0; + if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) return -EINVAL; @@ -1384,12 +1336,15 @@ xfs_ioctl_setattr_check_extsize( static int xfs_ioctl_setattr_check_cowextsize( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; xfs_extlen_t size; xfs_fsblock_t cowextsize_fsb; + if (!fa->fsx_valid) + return 0; + if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) return 0; @@ -1416,8 +1371,11 @@ xfs_ioctl_setattr_check_cowextsize( static int xfs_ioctl_setattr_check_projid( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { + if (!fa->fsx_valid) + return 0; + /* Disallow 32bit project ids if projid32bit feature is not enabled. */ if (fa->fsx_projid > (uint16_t)-1 && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) @@ -1425,12 +1383,13 @@ xfs_ioctl_setattr_check_projid( return 0; } -STATIC int -xfs_ioctl_setattr( - xfs_inode_t *ip, - struct fsxattr *fa) +int +xfs_fileattr_set( + struct user_namespace *mnt_userns, + struct dentry *dentry, + struct fileattr *fa) { - struct fsxattr old_fa; + struct xfs_inode *ip = XFS_I(d_inode(dentry)); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_dquot *pdqp = NULL; @@ -1439,6 +1398,16 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); + if (d_is_special(dentry)) + return -ENOTTY; + + if (!fa->fsx_valid) { + if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | + FS_NOATIME_FL | FS_NODUMP_FL | + FS_SYNC_FL | FS_DAX_FL | FS_PROJINHERIT_FL)) + return -EOPNOTSUPP; + } + error = xfs_ioctl_setattr_check_projid(ip, fa); if (error) return error; @@ -1451,7 +1420,7 @@ xfs_ioctl_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp)) { + if (fa->fsx_valid && XFS_IS_QUOTA_ON(mp)) { error = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid, VFS_I(ip)->i_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, NULL, NULL, &pdqp); @@ -1467,11 +1436,6 @@ xfs_ioctl_setattr( goto error_free_dquots; } - xfs_fill_fsxattr(ip, false, &old_fa); - error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa); - if (error) - goto error_trans_cancel; - error = xfs_ioctl_setattr_check_extsize(ip, fa); if (error) goto error_trans_cancel; @@ -1484,6 +1448,8 @@ xfs_ioctl_setattr( if (error) goto error_trans_cancel; + if (!fa->fsx_valid) + goto skip_xattr; /* * Change file ownership. Must be the owner or privileged. CAP_FSETID * overrides the following restrictions: @@ -1493,7 +1459,7 @@ xfs_ioctl_setattr( */ if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) && - !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) + !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID)) VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID); /* Change the ownerships and register project quota modifications */ @@ -1521,6 +1487,7 @@ xfs_ioctl_setattr( else ip->i_d.di_cowextsize = 0; +skip_xattr: error = xfs_trans_commit(tp); /* @@ -1538,92 +1505,6 @@ error_free_dquots: return error; } -STATIC int -xfs_ioc_fssetxattr( - xfs_inode_t *ip, - struct file *filp, - void __user *arg) -{ - struct fsxattr fa; - int error; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - error = mnt_want_write_file(filp); - if (error) - return error; - error = xfs_ioctl_setattr(ip, &fa); - mnt_drop_write_file(filp); - return error; -} - -STATIC int -xfs_ioc_getxflags( - xfs_inode_t *ip, - void __user *arg) -{ - unsigned int flags; - - flags = xfs_di2lxflags(ip->i_d.di_flags, ip->i_d.di_flags2); - if (copy_to_user(arg, &flags, sizeof(flags))) - return -EFAULT; - return 0; -} - -STATIC int -xfs_ioc_setxflags( - struct xfs_inode *ip, - struct file *filp, - void __user *arg) -{ - struct xfs_trans *tp; - struct fsxattr fa; - struct fsxattr old_fa; - unsigned int flags; - int error; - - if (copy_from_user(&flags, arg, sizeof(flags))) - return -EFAULT; - - if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ - FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL | FS_DAX_FL)) - return -EOPNOTSUPP; - - fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - - error = mnt_want_write_file(filp); - if (error) - return error; - - xfs_ioctl_setattr_prepare_dax(ip, &fa); - - tp = xfs_ioctl_setattr_get_trans(ip, NULL); - if (IS_ERR(tp)) { - error = PTR_ERR(tp); - goto out_drop_write; - } - - xfs_fill_fsxattr(ip, false, &old_fa); - error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa); - if (error) { - xfs_trans_cancel(tp); - goto out_drop_write; - } - - error = xfs_ioctl_setattr_xflags(tp, ip, &fa); - if (error) { - xfs_trans_cancel(tp); - goto out_drop_write; - } - - error = xfs_trans_commit(tp); -out_drop_write: - mnt_drop_write_file(filp); - return error; -} - static bool xfs_getbmap_format( struct kgetbmap *p, @@ -2110,10 +1991,10 @@ xfs_file_ioctl( case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: - return xfs_ioc_fsbulkstat(mp, cmd, arg); + return xfs_ioc_fsbulkstat(filp, cmd, arg); case XFS_IOC_BULKSTAT: - return xfs_ioc_bulkstat(mp, cmd, arg); + return xfs_ioc_bulkstat(filp, cmd, arg); case XFS_IOC_INUMBERS: return xfs_ioc_inumbers(mp, cmd, arg); @@ -2130,16 +2011,8 @@ xfs_file_ioctl( case XFS_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *)arg); - case XFS_IOC_FSGETXATTR: - return xfs_ioc_fsgetxattr(ip, 0, arg); case XFS_IOC_FSGETXATTRA: - return xfs_ioc_fsgetxattr(ip, 1, arg); - case XFS_IOC_FSSETXATTR: - return xfs_ioc_fssetxattr(ip, filp, arg); - case XFS_IOC_GETXFLAGS: - return xfs_ioc_getxflags(ip, arg); - case XFS_IOC_SETXFLAGS: - return xfs_ioc_setxflags(ip, filp, arg); + return xfs_ioc_fsgetxattra(ip, arg); case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index bab6a5a92407..28453a6d4461 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -47,6 +47,17 @@ xfs_handle_to_dentry( void __user *uhandle, u32 hlen); +extern int +xfs_fileattr_get( + struct dentry *dentry, + struct fileattr *fa); + +extern int +xfs_fileattr_set( + struct user_namespace *mnt_userns, + struct dentry *dentry, + struct fileattr *fa); + extern long xfs_file_ioctl( struct file *filp, diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c1771e728117..e6506773ba55 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -209,14 +209,16 @@ xfs_fsbulkstat_one_fmt_compat( /* copied from xfs_ioctl.c */ STATIC int xfs_compat_ioc_fsbulkstat( - xfs_mount_t *mp, + struct file *file, unsigned int cmd, struct compat_xfs_fsop_bulkreq __user *p32) { + struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; u32 addr; struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, + .mnt_userns = file_mnt_user_ns(file), .ocount = 0, }; xfs_ino_t lastino; @@ -436,7 +438,6 @@ xfs_file_compat_ioctl( { struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; void __user *arg = compat_ptr(p); int error; @@ -456,7 +457,7 @@ xfs_file_compat_ioctl( return xfs_ioc_space(filp, &bf); } case XFS_IOC_FSGEOMETRY_V1_32: - return xfs_compat_ioc_fsgeometry_v1(mp, arg); + return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg); case XFS_IOC_FSGROWFSDATA_32: { struct xfs_growfs_data in; @@ -465,7 +466,7 @@ xfs_file_compat_ioctl( error = mnt_want_write_file(filp); if (error) return error; - error = xfs_growfs_data(mp, &in); + error = xfs_growfs_data(ip->i_mount, &in); mnt_drop_write_file(filp); return error; } @@ -477,14 +478,12 @@ xfs_file_compat_ioctl( error = mnt_want_write_file(filp); if (error) return error; - error = xfs_growfs_rt(mp, &in); + error = xfs_growfs_rt(ip->i_mount, &in); mnt_drop_write_file(filp); return error; } #endif /* long changes size, but xfs only copiese out 32 bits */ - case XFS_IOC_GETXFLAGS_32: - case XFS_IOC_SETXFLAGS_32: case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); return xfs_file_ioctl(filp, cmd, p); @@ -507,7 +506,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: - return xfs_compat_ioc_fsbulkstat(mp, cmd, arg); + return xfs_compat_ioc_fsbulkstat(filp, cmd, arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: case XFS_IOC_PATH_TO_FSHANDLE_32: { diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 053de7d894cd..9929482bf358 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -17,8 +17,6 @@ */ /* stock kernel-level ioctls we support */ -#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS -#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS #define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION /* diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 00369502fe25..e1f749b711f8 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -21,6 +21,7 @@ #include "xfs_dir2.h" #include "xfs_iomap.h" #include "xfs_error.h" +#include "xfs_ioctl.h" #include <linux/posix_acl.h> #include <linux/security.h> @@ -128,6 +129,7 @@ xfs_cleanup_inode( STATIC int xfs_generic_create( + struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, @@ -161,9 +163,10 @@ xfs_generic_create( goto out_free_acl; if (!tmpfile) { - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev, + &ip); } else { - error = xfs_create_tmpfile(XFS_I(dir), mode, &ip); + error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip); } if (unlikely(error)) goto out_free_acl; @@ -220,31 +223,35 @@ xfs_generic_create( STATIC int xfs_vn_mknod( - struct inode *dir, - struct dentry *dentry, - umode_t mode, - dev_t rdev) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) { - return xfs_generic_create(dir, dentry, mode, rdev, false); + return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, false); } STATIC int xfs_vn_create( - struct inode *dir, - struct dentry *dentry, - umode_t mode, - bool flags) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode, + bool flags) { - return xfs_generic_create(dir, dentry, mode, 0, false); + return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, false); } STATIC int xfs_vn_mkdir( - struct inode *dir, - struct dentry *dentry, - umode_t mode) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode) { - return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false); + return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0, + false); } STATIC struct dentry * @@ -361,9 +368,10 @@ xfs_vn_unlink( STATIC int xfs_vn_symlink( - struct inode *dir, - struct dentry *dentry, - const char *symname) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + const char *symname) { struct inode *inode; struct xfs_inode *cip = NULL; @@ -377,7 +385,7 @@ xfs_vn_symlink( if (unlikely(error)) goto out; - error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); + error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) goto out; @@ -403,11 +411,12 @@ xfs_vn_symlink( STATIC int xfs_vn_rename( - struct inode *odir, - struct dentry *odentry, - struct inode *ndir, - struct dentry *ndentry, - unsigned int flags) + struct user_namespace *mnt_userns, + struct inode *odir, + struct dentry *odentry, + struct inode *ndir, + struct dentry *ndentry, + unsigned int flags) { struct inode *new_inode = d_inode(ndentry); int omode = 0; @@ -431,8 +440,8 @@ xfs_vn_rename( if (unlikely(error)) return error; - return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)), - XFS_I(ndir), &nname, + return xfs_rename(mnt_userns, XFS_I(odir), &oname, + XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL, flags); } @@ -529,6 +538,7 @@ xfs_stat_blksize( STATIC int xfs_vn_getattr( + struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, @@ -547,8 +557,8 @@ xfs_vn_getattr( stat->dev = inode->i_sb->s_dev; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; + stat->uid = i_uid_into_mnt(mnt_userns, inode); + stat->gid = i_gid_into_mnt(mnt_userns, inode); stat->ino = ip->i_ino; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; @@ -626,8 +636,9 @@ xfs_setattr_time( static int xfs_vn_change_ok( - struct dentry *dentry, - struct iattr *iattr) + struct user_namespace *mnt_userns, + struct dentry *dentry, + struct iattr *iattr) { struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount; @@ -637,7 +648,7 @@ xfs_vn_change_ok( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - return setattr_prepare(dentry, iattr); + return setattr_prepare(mnt_userns, dentry, iattr); } /* @@ -648,6 +659,7 @@ xfs_vn_change_ok( */ static int xfs_setattr_nonsize( + struct user_namespace *mnt_userns, struct xfs_inode *ip, struct iattr *iattr) { @@ -788,7 +800,7 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if (mask & ATTR_MODE) { - error = posix_acl_chmod(inode, inode->i_mode); + error = posix_acl_chmod(mnt_userns, inode, inode->i_mode); if (error) return error; } @@ -809,6 +821,7 @@ out_dqrele: */ STATIC int xfs_setattr_size( + struct user_namespace *mnt_userns, struct xfs_inode *ip, struct iattr *iattr) { @@ -840,7 +853,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr); + return xfs_setattr_nonsize(mnt_userns, ip, iattr); } /* @@ -1009,6 +1022,7 @@ out_trans_cancel: int xfs_vn_setattr_size( + struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { @@ -1017,14 +1031,15 @@ xfs_vn_setattr_size( trace_xfs_setattr(ip); - error = xfs_vn_change_ok(dentry, iattr); + error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (error) return error; - return xfs_setattr_size(ip, iattr); + return xfs_setattr_size(mnt_userns, ip, iattr); } STATIC int xfs_vn_setattr( + struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { @@ -1044,14 +1059,14 @@ xfs_vn_setattr( return error; } - error = xfs_vn_setattr_size(dentry, iattr); + error = xfs_vn_setattr_size(mnt_userns, dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { trace_xfs_setattr(ip); - error = xfs_vn_change_ok(dentry, iattr); + error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (!error) - error = xfs_setattr_nonsize(ip, iattr); + error = xfs_setattr_nonsize(mnt_userns, ip, iattr); } return error; @@ -1122,11 +1137,12 @@ xfs_vn_fiemap( STATIC int xfs_vn_tmpfile( - struct inode *dir, - struct dentry *dentry, - umode_t mode) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode) { - return xfs_generic_create(dir, dentry, mode, 0, true); + return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, true); } static const struct inode_operations xfs_inode_operations = { @@ -1137,6 +1153,8 @@ static const struct inode_operations xfs_inode_operations = { .listxattr = xfs_vn_listxattr, .fiemap = xfs_vn_fiemap, .update_time = xfs_vn_update_time, + .fileattr_get = xfs_fileattr_get, + .fileattr_set = xfs_fileattr_set, }; static const struct inode_operations xfs_dir_inode_operations = { @@ -1162,6 +1180,8 @@ static const struct inode_operations xfs_dir_inode_operations = { .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, .tmpfile = xfs_vn_tmpfile, + .fileattr_get = xfs_fileattr_get, + .fileattr_set = xfs_fileattr_set, }; static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1187,6 +1207,8 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, .tmpfile = xfs_vn_tmpfile, + .fileattr_get = xfs_fileattr_get, + .fileattr_set = xfs_fileattr_set, }; static const struct inode_operations xfs_symlink_inode_operations = { diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index 99ca745c1071..278949056048 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -14,6 +14,7 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); -extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap); +int xfs_vn_setattr_size(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *vap); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 16ca97a7ff00..3498b97fb06d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -54,10 +54,12 @@ struct xfs_bstat_chunk { STATIC int xfs_bulkstat_one_int( struct xfs_mount *mp, + struct user_namespace *mnt_userns, struct xfs_trans *tp, xfs_ino_t ino, struct xfs_bstat_chunk *bc) { + struct user_namespace *sb_userns = mp->m_super->s_user_ns; struct xfs_icdinode *dic; /* dinode core info pointer */ struct xfs_inode *ip; /* incore inode pointer */ struct inode *inode; @@ -86,8 +88,8 @@ xfs_bulkstat_one_int( */ buf->bs_projectid = ip->i_d.di_projid; buf->bs_ino = ino; - buf->bs_uid = i_uid_read(inode); - buf->bs_gid = i_gid_read(inode); + buf->bs_uid = from_kuid(sb_userns, i_uid_into_mnt(mnt_userns, inode)); + buf->bs_gid = from_kgid(sb_userns, i_gid_into_mnt(mnt_userns, inode)); buf->bs_size = dic->di_size; buf->bs_nlink = inode->i_nlink; @@ -166,6 +168,12 @@ xfs_bulkstat_one( }; int error; + if (breq->mnt_userns != &init_user_ns) { + xfs_warn_ratelimited(breq->mp, + "bulkstat not supported inside of idmapped mounts."); + return -EINVAL; + } + ASSERT(breq->icount == 1); bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), @@ -173,7 +181,8 @@ xfs_bulkstat_one( if (!bc.buf) return -ENOMEM; - error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); + error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, NULL, + breq->startino, &bc); kmem_free(bc.buf); @@ -194,9 +203,10 @@ xfs_bulkstat_iwalk( xfs_ino_t ino, void *data) { + struct xfs_bstat_chunk *bc = data; int error; - error = xfs_bulkstat_one_int(mp, tp, ino, data); + error = xfs_bulkstat_one_int(mp, bc->breq->mnt_userns, tp, ino, data); /* bulkstat just skips over missing inodes */ if (error == -ENOENT || error == -EINVAL) return 0; @@ -239,6 +249,11 @@ xfs_bulkstat( }; int error; + if (breq->mnt_userns != &init_user_ns) { + xfs_warn_ratelimited(breq->mp, + "bulkstat not supported inside of idmapped mounts."); + return -EINVAL; + } if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 96a1e2a9be3f..7078d10c9b12 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -8,6 +8,7 @@ /* In-memory representation of a userspace request for batch inode data. */ struct xfs_ibulk { struct xfs_mount *mp; + struct user_namespace *mnt_userns; void __user *ubuffer; /* user output buffer */ xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 52370d0a3f43..1c97b155a8ee 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -635,6 +635,47 @@ xfs_check_summary_counts( } /* + * Flush and reclaim dirty inodes in preparation for unmount. Inodes and + * internal inode structures can be sitting in the CIL and AIL at this point, + * so we need to unpin them, write them back and/or reclaim them before unmount + * can proceed. + * + * An inode cluster that has been freed can have its buffer still pinned in + * memory because the transaction is still sitting in a iclog. The stale inodes + * on that buffer will be pinned to the buffer until the transaction hits the + * disk and the callbacks run. Pushing the AIL will skip the stale inodes and + * may never see the pinned buffer, so nothing will push out the iclog and + * unpin the buffer. + * + * Hence we need to force the log to unpin everything first. However, log + * forces don't wait for the discards they issue to complete, so we have to + * explicitly wait for them to complete here as well. + * + * Then we can tell the world we are unmounting so that error handling knows + * that the filesystem is going away and we should error out anything that we + * have been retrying in the background. This will prevent never-ending + * retries in AIL pushing from hanging the unmount. + * + * Finally, we can push the AIL to clean all the remaining dirty objects, then + * reclaim the remaining inodes that are still in memory at this point in time. + */ +static void +xfs_unmount_flush_inodes( + struct xfs_mount *mp) +{ + xfs_log_force(mp, XFS_LOG_SYNC); + xfs_extent_busy_wait_all(mp); + flush_workqueue(xfs_discard_wq); + + mp->m_flags |= XFS_MOUNT_UNMOUNTING; + + xfs_ail_push_all_sync(mp->m_ail); + cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_reclaim_inodes(mp); + xfs_health_unmount(mp); +} + +/* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct * - if we're a 32-bit kernel, do a size check on the superblock @@ -1008,7 +1049,7 @@ xfs_mountfs( /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); /* - * Cancel all delayed reclaim work and reclaim the inodes directly. + * Flush all inode reclamation work and flush the log. * We have to do this /after/ rtunmount and qm_unmount because those * two will have scheduled delayed reclaim for the rt/quota inodes. * @@ -1018,11 +1059,8 @@ xfs_mountfs( * qm_unmount_quotas and therefore rely on qm_unmount to release the * quota inodes. */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); out_log_dealloc: - mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -1063,47 +1101,7 @@ xfs_unmountfs( xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); - /* - * We can potentially deadlock here if we have an inode cluster - * that has been freed has its buffer still pinned in memory because - * the transaction is still sitting in a iclog. The stale inodes - * on that buffer will be pinned to the buffer until the - * transaction hits the disk and the callbacks run. Pushing the AIL will - * skip the stale inodes and may never see the pinned buffer, so - * nothing will push out the iclog and unpin the buffer. Hence we - * need to force the log here to ensure all items are flushed into the - * AIL before we go any further. - */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* - * Wait for all busy extents to be freed, including completion of - * any discard operation. - */ - xfs_extent_busy_wait_all(mp); - flush_workqueue(xfs_discard_wq); - - /* - * We now need to tell the world we are unmounting. This will allow - * us to detect that the filesystem is going away and we should error - * out anything that we have been retrying in the background. This will - * prevent neverending retries in AIL pushing from hanging the unmount. - */ - mp->m_flags |= XFS_MOUNT_UNMOUNTING; - - /* - * Flush all pending changes from the AIL. - */ - xfs_ail_push_all_sync(mp->m_ail); - - /* - * Reclaim all inodes. At this point there should be no dirty inodes and - * none should be pinned or locked. Stop background inode reclaim here - * if it is still running. - */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); xfs_qm_unmount(mp); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 742d1413e2d0..bfa4164990b1 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -787,7 +787,8 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp); + error = xfs_dir_ialloc(&init_user_ns, &tp, NULL, S_IFREG, 1, 0, + 0, ipp); if (error) { xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 07ebccbbf4df..746f4eda724c 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -269,8 +269,8 @@ xfs_trans_log_finish_refcount_update( static int xfs_refcount_update_diff_items( void *priv, - struct list_head *a, - struct list_head *b) + const struct list_head *a, + const struct list_head *b) { struct xfs_mount *mp = priv; struct xfs_refcount_intent *ra; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 49cebd68b672..dc4f0c9f0897 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -337,8 +337,8 @@ xfs_trans_log_finish_rmap_update( static int xfs_rmap_update_diff_items( void *priv, - struct list_head *a, - struct list_head *b) + const struct list_head *a, + const struct list_head *b) { struct xfs_mount *mp = priv; struct xfs_rmap_intent *ra; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 586d42342a79..e5e0713bebcd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1881,7 +1881,7 @@ static struct file_system_type xfs_fs_type = { .init_fs_context = xfs_init_fs_context, .parameters = xfs_fs_parameters, .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("xfs"); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8565663b16cd..63edb4dbed4a 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -134,6 +134,7 @@ xfs_readlink( int xfs_symlink( + struct user_namespace *mnt_userns, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, @@ -181,7 +182,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), + mapped_fsgid(mnt_userns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -221,8 +223,8 @@ xfs_symlink( /* * Allocate an inode for the symlink. */ - error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, - prid, &ip); + error = xfs_dir_ialloc(mnt_userns, &tp, dp, S_IFLNK | (mode & ~S_IFMT), + 1, 0, prid, &ip); if (error) goto out_trans_cancel; diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index b1fa091427e6..2586b7e393f3 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -7,8 +7,9 @@ /* Kernel only symlink definitions */ -int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, - const char *target_path, umode_t mode, struct xfs_inode **ipp); +int xfs_symlink(struct user_namespace *mnt_userns, struct xfs_inode *dp, + struct xfs_name *link_name, const char *target_path, + umode_t mode, struct xfs_inode **ipp); int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link); int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_inactive_symlink(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 145e06c47744..546a6cd96729 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler( #endif /* CONFIG_PROC_FS */ STATIC int -xfs_deprecate_irix_sgid_inherit_proc_handler( +xfs_deprecated_dointvec_minmax( struct ctl_table *ctl, int write, void *buffer, @@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler( loff_t *ppos) { if (write) { - printk_once(KERN_WARNING - "XFS: " "%s sysctl option is deprecated.\n", - ctl->procname); - } - return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); -} - -STATIC int -xfs_deprecate_irix_symlink_mode_proc_handler( - struct ctl_table *ctl, - int write, - void *buffer, - size_t *lenp, - loff_t *ppos) -{ - if (write) { - printk_once(KERN_WARNING - "XFS: " "%s sysctl option is deprecated.\n", + printk_ratelimited(KERN_WARNING + "XFS: %s sysctl option is deprecated.\n", ctl->procname); } return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); @@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = { .data = &xfs_params.sgid_inherit.val, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler, + .proc_handler = xfs_deprecated_dointvec_minmax, .extra1 = &xfs_params.sgid_inherit.min, .extra2 = &xfs_params.sgid_inherit.max }, @@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = { .data = &xfs_params.symlink_mode.val, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler, + .proc_handler = xfs_deprecated_dointvec_minmax, .extra1 = &xfs_params.symlink_mode.min, .extra2 = &xfs_params.symlink_mode.max }, @@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = { .extra1 = &xfs_params.blockgc_timer.min, .extra2 = &xfs_params.blockgc_timer.max, }, + { + .procname = "speculative_cow_prealloc_lifetime", + .data = &xfs_params.blockgc_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = xfs_deprecated_dointvec_minmax, + .extra1 = &xfs_params.blockgc_timer.min, + .extra2 = &xfs_params.blockgc_timer.max, + }, /* please keep this the last entry */ #ifdef CONFIG_PROC_FS { diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 44f72c09c203..b22a09e9daee 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -72,6 +72,7 @@ xfs_trans_free( xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); trace_xfs_trans_free(tp, _RET_IP_); + xfs_trans_clear_context(tp); if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); @@ -123,7 +124,8 @@ xfs_trans_dup( ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - ntp->t_pflags = tp->t_pflags; + + xfs_trans_switch_context(tp, ntp); /* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); @@ -157,9 +159,6 @@ xfs_trans_reserve( int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - /* Mark this thread as being in a transaction */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - /* * Attempt to reserve the needed disk blocks by decrementing * the number needed from the number available. This will @@ -167,10 +166,8 @@ xfs_trans_reserve( */ if (blocks > 0) { error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); - if (error != 0) { - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + if (error != 0) return -ENOSPC; - } tp->t_blk_res += blocks; } @@ -244,9 +241,6 @@ undo_blocks: xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } - - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - return error; } @@ -260,6 +254,7 @@ xfs_trans_alloc( struct xfs_trans **tpp) { struct xfs_trans *tp; + bool want_retry = true; int error; /* @@ -267,9 +262,11 @@ xfs_trans_alloc( * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ +retry: tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); + xfs_trans_set_context(tp); /* * Zero-reservation ("empty") transactions can't modify anything, so @@ -289,7 +286,9 @@ xfs_trans_alloc( tp->t_firstblock = NULLFSBLOCK; error = xfs_trans_reserve(tp, resp, blocks, rtextents); - if (error == -ENOSPC) { + if (error == -ENOSPC && want_retry) { + xfs_trans_cancel(tp); + /* * We weren't able to reserve enough space for the transaction. * Flush the other speculative space allocations to free space. @@ -297,8 +296,11 @@ xfs_trans_alloc( * other locks. */ error = xfs_blockgc_free_space(mp, NULL); - if (!error) - error = xfs_trans_reserve(tp, resp, blocks, rtextents); + if (error) + return error; + + want_retry = false; + goto retry; } if (error) { xfs_trans_cancel(tp); @@ -893,7 +895,6 @@ __xfs_trans_commit( xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free(tp); /* @@ -925,7 +926,6 @@ out_unreserve: xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; } - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, !!error); xfs_trans_free(tp); @@ -985,9 +985,6 @@ xfs_trans_cancel( tp->t_ticket = NULL; } - /* mark this thread as no longer being in a transaction */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 8b03fbfe9a1b..9dd745cf77c9 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, struct xfs_trans **tpp); +static inline void +xfs_trans_set_context( + struct xfs_trans *tp) +{ + ASSERT(current->journal_info == NULL); + tp->t_pflags = memalloc_nofs_save(); + current->journal_info = tp; +} + +static inline void +xfs_trans_clear_context( + struct xfs_trans *tp) +{ + if (current->journal_info == tp) { + memalloc_nofs_restore(tp->t_pflags); + current->journal_info = NULL; + } +} + +static inline void +xfs_trans_switch_context( + struct xfs_trans *old_tp, + struct xfs_trans *new_tp) +{ + ASSERT(current->journal_info == old_tp); + new_tp->t_pflags = old_tp->t_pflags; + old_tp->t_pflags = 0; + current->journal_info = new_tp; +} + #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index bca48b308c02..12be32f66dc1 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -38,9 +38,10 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, } static int -xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, - struct inode *inode, const char *name, const void *value, - size_t size, int flags) +xfs_xattr_set(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *unused, + struct inode *inode, const char *name, const void *value, + size_t size, int flags) { struct xfs_da_args args = { .dp = XFS_I(inode), |