diff options
-rw-r--r-- | fs/xfs/Kconfig | 12 | ||||
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 43 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.c | 661 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dir2.h | 49 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_format.h | 9 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 15 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_util.c | 749 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_util.h | 62 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ondisk.h | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_shared.h | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/common.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/tempfile.c | 21 | ||||
-rw-r--r-- | fs/xfs/xfs.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 32 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 1487 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 70 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 32 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 60 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 51 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 70 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.h | 1 |
28 files changed, 1948 insertions, 1545 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index d41edd30388b..fffd6fffdce0 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -217,6 +217,18 @@ config XFS_DEBUG Say N unless you are an XFS developer, or you play one on TV. +config XFS_DEBUG_EXPENSIVE + bool "XFS expensive debugging checks" + depends on XFS_FS && XFS_DEBUG + help + Say Y here to get an XFS build with expensive debugging checks + enabled. These checks may affect performance significantly. + + Note that the resulting code will be HUGER and SLOWER, and probably + not useful unless you are debugging a particular problem. + + Say N unless you are an XFS developer, or you play one on TV. + config XFS_ASSERT_FATAL bool "XFS fatal asserts" default y diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c50447548d65..dd692619bed5 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -40,6 +40,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_iext_tree.o \ xfs_inode_fork.o \ xfs_inode_buf.o \ + xfs_inode_util.o \ xfs_log_rlimit.o \ xfs_ag_resv.o \ xfs_parent.o \ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 6af6f744fdd6..09e3302a4b72 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -39,6 +39,7 @@ #include "xfs_health.h" #include "xfs_bmap_item.h" #include "xfs_symlink_remote.h" +#include "xfs_inode_util.h" struct kmem_cache *xfs_bmap_intent_cache; @@ -6454,3 +6455,45 @@ xfs_bmap_query_all( return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query); } + +/* Helper function to extract extent size hint from inode */ +xfs_extlen_t +xfs_get_extsz_hint( + struct xfs_inode *ip) +{ + /* + * No point in aligning allocations if we need to COW to actually + * write to them. + */ + if (xfs_is_always_cow_inode(ip)) + return 0; + if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) + return ip->i_extsize; + if (XFS_IS_REALTIME_INODE(ip) && + ip->i_mount->m_sb.sb_rextsize > 1) + return ip->i_mount->m_sb.sb_rextsize; + return 0; +} + +/* + * Helper function to extract CoW extent size hint from inode. + * Between the extent size hint and the CoW extent size hint, we + * return the greater of the two. If the value is zero (automatic), + * use the default size. + */ +xfs_extlen_t +xfs_get_cowextsz_hint( + struct xfs_inode *ip) +{ + xfs_extlen_t a, b; + + a = 0; + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) + a = ip->i_cowextsize; + b = xfs_get_extsz_hint(ip); + + a = max(a, b); + if (a == 0) + return XFS_DEFAULT_COWEXTSZ_HINT; + return a; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 667b0c2b33d1..7592d46e97c6 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -296,4 +296,7 @@ typedef int (*xfs_bmap_query_range_fn)( int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn, void *priv); +xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); +xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 457f9a38f850..202468223bf9 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -19,6 +19,11 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_health.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_parent.h" +#include "xfs_ag.h" +#include "xfs_ialloc.h" const struct xfs_name xfs_name_dotdot = { .name = (const unsigned char *)"..", @@ -584,9 +589,9 @@ xfs_dir_replace( */ int xfs_dir_canenter( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name) /* name of entry to add */ + struct xfs_trans *tp, + struct xfs_inode *dp, + const struct xfs_name *name) /* name of entry to add */ { return xfs_dir_createname(tp, dp, name, 0, 0); } @@ -756,3 +761,653 @@ xfs_dir2_compname( return xfs_ascii_ci_compname(args, name, len); return xfs_da_compname(args, name, len); } + +#ifdef CONFIG_XFS_LIVE_HOOKS +/* + * Use a static key here to reduce the overhead of directory live update hooks. + * If the compiler supports jump labels, the static branch will be replaced by + * a nop sled when there are no hook users. Online fsck is currently the only + * caller, so this is a reasonable tradeoff. + * + * Note: Patching the kernel code requires taking the cpu hotplug lock. Other + * parts of the kernel allocate memory with that lock held, which means that + * XFS callers cannot hold any locks that might be used by memory reclaim or + * writeback when calling the static_branch_{inc,dec} functions. + */ +DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch); + +void +xfs_dir_hook_disable(void) +{ + xfs_hooks_switch_off(&xfs_dir_hooks_switch); +} + +void +xfs_dir_hook_enable(void) +{ + xfs_hooks_switch_on(&xfs_dir_hooks_switch); +} + +/* Call hooks for a directory update relating to a child dirent update. */ +inline void +xfs_dir_update_hook( + struct xfs_inode *dp, + struct xfs_inode *ip, + int delta, + const struct xfs_name *name) +{ + if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) { + struct xfs_dir_update_params p = { + .dp = dp, + .ip = ip, + .delta = delta, + .name = name, + }; + struct xfs_mount *mp = ip->i_mount; + + xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p); + } +} + +/* Call the specified function during a directory update. */ +int +xfs_dir_hook_add( + struct xfs_mount *mp, + struct xfs_dir_hook *hook) +{ + return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook); +} + +/* Stop calling the specified function during a directory update. */ +void +xfs_dir_hook_del( + struct xfs_mount *mp, + struct xfs_dir_hook *hook) +{ + xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook); +} + +/* Configure directory update hook functions. */ +void +xfs_dir_hook_setup( + struct xfs_dir_hook *hook, + notifier_fn_t mod_fn) +{ + xfs_hook_setup(&hook->dirent_hook, mod_fn); +} +#endif /* CONFIG_XFS_LIVE_HOOKS */ + +/* + * Given a directory @dp, a newly allocated inode @ip, and a @name, link @ip + * into @dp under the given @name. If @ip is a directory, it will be + * initialized. Both inodes must have the ILOCK held and the transaction must + * have sufficient blocks reserved. + */ +int +xfs_dir_create_child( + struct xfs_trans *tp, + unsigned int resblks, + struct xfs_dir_update *du) +{ + struct xfs_inode *dp = du->dp; + const struct xfs_name *name = du->name; + struct xfs_inode *ip = du->ip; + int error; + + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + xfs_assert_ilocked(dp, XFS_ILOCK_EXCL); + + error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks); + if (error) { + ASSERT(error != -ENOSPC); + return error; + } + + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + + if (S_ISDIR(VFS_I(ip)->i_mode)) { + error = xfs_dir_init(tp, ip, dp); + if (error) + return error; + + xfs_bumplink(tp, dp); + } + + /* + * If we have parent pointers, we need to add the attribute containing + * the parent information now. + */ + if (du->ppargs) { + error = xfs_parent_addname(tp, du->ppargs, dp, name, ip); + if (error) + return error; + } + + xfs_dir_update_hook(dp, ip, 1, name); + return 0; +} + +/* + * Given a directory @dp, an existing non-directory inode @ip, and a @name, + * link @ip into @dp under the given @name. Both inodes must have the ILOCK + * held. + */ +int +xfs_dir_add_child( + struct xfs_trans *tp, + unsigned int resblks, + struct xfs_dir_update *du) +{ + struct xfs_inode *dp = du->dp; + const struct xfs_name *name = du->name; + struct xfs_inode *ip = du->ip; + struct xfs_mount *mp = tp->t_mountp; + int error; + + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + xfs_assert_ilocked(dp, XFS_ILOCK_EXCL); + ASSERT(!S_ISDIR(VFS_I(ip)->i_mode)); + + if (!resblks) { + error = xfs_dir_canenter(tp, dp, name); + if (error) + return error; + } + + /* + * Handle initial link state of O_TMPFILE inode + */ + if (VFS_I(ip)->i_nlink == 0) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + error = xfs_iunlink_remove(tp, pag, ip); + xfs_perag_put(pag); + if (error) + return error; + } + + error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks); + if (error) + return error; + + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + + xfs_bumplink(tp, ip); + + /* + * If we have parent pointers, we now need to add the parent record to + * the attribute fork of the inode. If this is the initial parent + * attribute, we need to create it correctly, otherwise we can just add + * the parent to the inode. + */ + if (du->ppargs) { + error = xfs_parent_addname(tp, du->ppargs, dp, name, ip); + if (error) + return error; + } + + xfs_dir_update_hook(dp, ip, 1, name); + return 0; +} + +/* + * Given a directory @dp, a child @ip, and a @name, remove the (@name, @ip) + * entry from the directory. Both inodes must have the ILOCK held. + */ +int +xfs_dir_remove_child( + struct xfs_trans *tp, + unsigned int resblks, + struct xfs_dir_update *du) +{ + struct xfs_inode *dp = du->dp; + const struct xfs_name *name = du->name; + struct xfs_inode *ip = du->ip; + int error; + + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + xfs_assert_ilocked(dp, XFS_ILOCK_EXCL); + + /* + * If we're removing a directory perform some additional validation. + */ + if (S_ISDIR(VFS_I(ip)->i_mode)) { + ASSERT(VFS_I(ip)->i_nlink >= 2); + if (VFS_I(ip)->i_nlink != 2) + return -ENOTEMPTY; + if (!xfs_dir_isempty(ip)) + return -ENOTEMPTY; + + /* Drop the link from ip's "..". */ + error = xfs_droplink(tp, dp); + if (error) + return error; + + /* Drop the "." link from ip to self. */ + error = xfs_droplink(tp, ip); + if (error) + return error; + + /* + * Point the unlinked child directory's ".." entry to the root + * directory to eliminate back-references to inodes that may + * get freed before the child directory is closed. If the fs + * gets shrunk, this can lead to dirent inode validation errors. + */ + if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { + error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, + tp->t_mountp->m_sb.sb_rootino, 0); + if (error) + return error; + } + } else { + /* + * When removing a non-directory we need to log the parent + * inode here. For a directory this is done implicitly + * by the xfs_droplink call for the ".." entry. + */ + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + } + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + /* Drop the link from dp to ip. */ + error = xfs_droplink(tp, ip); + if (error) + return error; + + error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks); + if (error) { + ASSERT(error != -ENOENT); + return error; + } + + /* Remove parent pointer. */ + if (du->ppargs) { + error = xfs_parent_removename(tp, du->ppargs, dp, name, ip); + if (error) + return error; + } + + xfs_dir_update_hook(dp, ip, -1, name); + return 0; +} + +/* + * Exchange the entry (@name1, @ip1) in directory @dp1 with the entry (@name2, + * @ip2) in directory @dp2, and update '..' @ip1 and @ip2's entries as needed. + * @ip1 and @ip2 need not be of the same type. + * + * All inodes must have the ILOCK held, and both entries must already exist. + */ +int +xfs_dir_exchange_children( + struct xfs_trans *tp, + struct xfs_dir_update *du1, + struct xfs_dir_update *du2, + unsigned int spaceres) +{ + struct xfs_inode *dp1 = du1->dp; + const struct xfs_name *name1 = du1->name; + struct xfs_inode *ip1 = du1->ip; + struct xfs_inode *dp2 = du2->dp; + const struct xfs_name *name2 = du2->name; + struct xfs_inode *ip2 = du2->ip; + int ip1_flags = 0; + int ip2_flags = 0; + int dp2_flags = 0; + int error; + + /* Swap inode number for dirent in first parent */ + error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); + if (error) + return error; + + /* Swap inode number for dirent in second parent */ + error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres); + if (error) + return error; + + /* + * If we're renaming one or more directories across different parents, + * update the respective ".." entries (and link counts) to match the new + * parents. + */ + if (dp1 != dp2) { + dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + + if (S_ISDIR(VFS_I(ip2)->i_mode)) { + error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, + dp1->i_ino, spaceres); + if (error) + return error; + + /* transfer ip2 ".." reference to dp1 */ + if (!S_ISDIR(VFS_I(ip1)->i_mode)) { + error = xfs_droplink(tp, dp2); + if (error) + return error; + xfs_bumplink(tp, dp1); + } + + /* + * Although ip1 isn't changed here, userspace needs + * to be warned about the change, so that applications + * relying on it (like backup ones), will properly + * notify the change + */ + ip1_flags |= XFS_ICHGTIME_CHG; + ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + } + + if (S_ISDIR(VFS_I(ip1)->i_mode)) { + error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, + dp2->i_ino, spaceres); + if (error) + return error; + + /* transfer ip1 ".." reference to dp2 */ + if (!S_ISDIR(VFS_I(ip2)->i_mode)) { + error = xfs_droplink(tp, dp1); + if (error) + return error; + xfs_bumplink(tp, dp2); + } + + /* + * Although ip2 isn't changed here, userspace needs + * to be warned about the change, so that applications + * relying on it (like backup ones), will properly + * notify the change + */ + ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + ip2_flags |= XFS_ICHGTIME_CHG; + } + } + + if (ip1_flags) { + xfs_trans_ichgtime(tp, ip1, ip1_flags); + xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); + } + if (ip2_flags) { + xfs_trans_ichgtime(tp, ip2, ip2_flags); + xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); + } + if (dp2_flags) { + xfs_trans_ichgtime(tp, dp2, dp2_flags); + xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); + } + xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); + + /* Schedule parent pointer replacements */ + if (du1->ppargs) { + error = xfs_parent_replacename(tp, du1->ppargs, dp1, name1, + dp2, name2, ip1); + if (error) + return error; + } + + if (du2->ppargs) { + error = xfs_parent_replacename(tp, du2->ppargs, dp2, name2, + dp1, name1, ip2); + if (error) + return error; + } + + /* + * Inform our hook clients that we've finished an exchange operation as + * follows: removed the source and target files from their directories; + * added the target to the source directory; and added the source to + * the target directory. All inodes are locked, so it's ok to model a + * rename this way so long as we say we deleted entries before we add + * new ones. + */ + xfs_dir_update_hook(dp1, ip1, -1, name1); + xfs_dir_update_hook(dp2, ip2, -1, name2); + xfs_dir_update_hook(dp1, ip2, 1, name1); + xfs_dir_update_hook(dp2, ip1, 1, name2); + return 0; +} + +/* + * Given an entry (@src_name, @src_ip) in directory @src_dp, make the entry + * @target_name in directory @target_dp point to @src_ip and remove the + * original entry, cleaning up everything left behind. + * + * Cleanup involves dropping a link count on @target_ip, and either removing + * the (@src_name, @src_ip) entry from @src_dp or simply replacing the entry + * with (@src_name, @wip) if a whiteout inode @wip is supplied. + * + * All inodes must have the ILOCK held. We assume that if @src_ip is a + * directory then its '..' doesn't already point to @target_dp, and that @wip + * is a freshly allocated whiteout. + */ +int +xfs_dir_rename_children( + struct xfs_trans *tp, + struct xfs_dir_update *du_src, + struct xfs_dir_update *du_tgt, + unsigned int spaceres, + struct xfs_dir_update *du_wip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *src_dp = du_src->dp; + const struct xfs_name *src_name = du_src->name; + struct xfs_inode *src_ip = du_src->ip; + struct xfs_inode *target_dp = du_tgt->dp; + const struct xfs_name *target_name = du_tgt->name; + struct xfs_inode *target_ip = du_tgt->ip; + bool new_parent = (src_dp != target_dp); + bool src_is_directory; + int error; + + src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); + + /* + * Check for expected errors before we dirty the transaction + * so we can return an error without a transaction abort. + */ + if (target_ip == NULL) { + /* + * If there's no space reservation, check the entry will + * fit before actually inserting it. + */ + if (!spaceres) { + error = xfs_dir_canenter(tp, target_dp, target_name); + if (error) + return error; + } + } else { + /* + * If target exists and it's a directory, check that whether + * it can be destroyed. + */ + if (S_ISDIR(VFS_I(target_ip)->i_mode) && + (!xfs_dir_isempty(target_ip) || + (VFS_I(target_ip)->i_nlink > 2))) + return -EEXIST; + } + + /* + * Directory entry creation below may acquire the AGF. Remove + * the whiteout from the unlinked list first to preserve correct + * AGI/AGF locking order. This dirties the transaction so failures + * after this point will abort and log recovery will clean up the + * mess. + * + * For whiteouts, we need to bump the link count on the whiteout + * inode. After this point, we have a real link, clear the tmpfile + * state flag from the inode so it doesn't accidentally get misused + * in future. + */ + if (du_wip->ip) { + struct xfs_perag *pag; + + ASSERT(VFS_I(du_wip->ip)->i_nlink == 0); + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, du_wip->ip->i_ino)); + error = xfs_iunlink_remove(tp, pag, du_wip->ip); + xfs_perag_put(pag); + if (error) + return error; + + xfs_bumplink(tp, du_wip->ip); + } + + /* + * Set up the target. + */ + if (target_ip == NULL) { + /* + * If target does not exist and the rename crosses + * directories, adjust the target directory link count + * to account for the ".." reference from the new entry. + */ + error = xfs_dir_createname(tp, target_dp, target_name, + src_ip->i_ino, spaceres); + if (error) + return error; + + xfs_trans_ichgtime(tp, target_dp, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + if (new_parent && src_is_directory) { + xfs_bumplink(tp, target_dp); + } + } else { /* target_ip != NULL */ + /* + * Link the source inode under the target name. + * If the source inode is a directory and we are moving + * it across directories, its ".." entry will be + * inconsistent until we replace that down below. + * + * In case there is already an entry with the same + * name at the destination directory, remove it first. + */ + error = xfs_dir_replace(tp, target_dp, target_name, + src_ip->i_ino, spaceres); + if (error) + return error; + + xfs_trans_ichgtime(tp, target_dp, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + /* + * Decrement the link count on the target since the target + * dir no longer points to it. + */ + error = xfs_droplink(tp, target_ip); + if (error) + return error; + + if (src_is_directory) { + /* + * Drop the link from the old "." entry. + */ + error = xfs_droplink(tp, target_ip); + if (error) + return error; + } + } /* target_ip != NULL */ + + /* + * Remove the source. + */ + if (new_parent && src_is_directory) { + /* + * Rewrite the ".." entry to point to the new + * directory. + */ + error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, + target_dp->i_ino, spaceres); + ASSERT(error != -EEXIST); + if (error) + return error; + } + + /* + * We always want to hit the ctime on the source inode. + * + * This isn't strictly required by the standards since the source + * inode isn't really being changed, but old unix file systems did + * it and some incremental backup programs won't work without it. + */ + xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); + + /* + * Adjust the link count on src_dp. This is necessary when + * renaming a directory, either within one parent when + * the target existed, or across two parent directories. + */ + if (src_is_directory && (new_parent || target_ip != NULL)) { + + /* + * Decrement link count on src_directory since the + * entry that's moved no longer points to it. + */ + error = xfs_droplink(tp, src_dp); + if (error) + return error; + } + + /* + * For whiteouts, we only need to update the source dirent with the + * inode number of the whiteout inode rather than removing it + * altogether. + */ + if (du_wip->ip) + error = xfs_dir_replace(tp, src_dp, src_name, du_wip->ip->i_ino, + spaceres); + else + error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, + spaceres); + if (error) + return error; + + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); + if (new_parent) + xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); + + /* Schedule parent pointer updates. */ + if (du_wip->ppargs) { + error = xfs_parent_addname(tp, du_wip->ppargs, src_dp, + src_name, du_wip->ip); + if (error) + return error; + } + + if (du_src->ppargs) { + error = xfs_parent_replacename(tp, du_src->ppargs, src_dp, + src_name, target_dp, target_name, src_ip); + if (error) + return error; + } + + if (du_tgt->ppargs) { + error = xfs_parent_removename(tp, du_tgt->ppargs, target_dp, + target_name, target_ip); + if (error) + return error; + } + + /* + * Inform our hook clients that we've finished a rename operation as + * follows: removed the source and target files from their directories; + * that we've added the source to the target directory; and finally + * that we've added the whiteout, if there was one. All inodes are + * locked, so it's ok to model a rename this way so long as we say we + * deleted entries before we add new ones. + */ + if (target_ip) + xfs_dir_update_hook(target_dp, target_ip, -1, target_name); + xfs_dir_update_hook(src_dp, src_ip, -1, src_name); + xfs_dir_update_hook(target_dp, src_ip, 1, target_name); + if (du_wip->ip) + xfs_dir_update_hook(src_dp, du_wip->ip, 1, src_name); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 6dbe6e9ecb49..576068ed81fa 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -74,7 +74,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name); + const struct xfs_name *name); int xfs_dir_lookup_args(struct xfs_da_args *args); int xfs_dir_createname_args(struct xfs_da_args *args); @@ -309,4 +309,51 @@ static inline unsigned char xfs_ascii_ci_xfrm(unsigned char c) return c; } +struct xfs_dir_update_params { + const struct xfs_inode *dp; + const struct xfs_inode *ip; + const struct xfs_name *name; + int delta; +}; + +#ifdef CONFIG_XFS_LIVE_HOOKS +void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip, + int delta, const struct xfs_name *name); + +struct xfs_dir_hook { + struct xfs_hook dirent_hook; +}; + +void xfs_dir_hook_disable(void); +void xfs_dir_hook_enable(void); + +int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook); +void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook); +void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn); +#else +# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0) +#endif /* CONFIG_XFS_LIVE_HOOKS */ + +struct xfs_parent_args; + +struct xfs_dir_update { + struct xfs_inode *dp; + const struct xfs_name *name; + struct xfs_inode *ip; + struct xfs_parent_args *ppargs; +}; + +int xfs_dir_create_child(struct xfs_trans *tp, unsigned int resblks, + struct xfs_dir_update *du); +int xfs_dir_add_child(struct xfs_trans *tp, unsigned int resblks, + struct xfs_dir_update *du); +int xfs_dir_remove_child(struct xfs_trans *tp, unsigned int resblks, + struct xfs_dir_update *du); + +int xfs_dir_exchange_children(struct xfs_trans *tp, struct xfs_dir_update *du1, + struct xfs_dir_update *du2, unsigned int spaceres); +int xfs_dir_rename_children(struct xfs_trans *tp, struct xfs_dir_update *du_src, + struct xfs_dir_update *du_tgt, unsigned int spaceres, + struct xfs_dir_update *du_wip); + #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 61f51becff4f..e1bfee0c3b1a 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -90,8 +90,7 @@ struct xfs_ifork; #define XFSLABEL_MAX 12 /* - * Superblock - in core version. Must match the ondisk version below. - * Must be padded to 64 bit alignment. + * Superblock - in core version. Must be padded to 64 bit alignment. */ typedef struct xfs_sb { uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ @@ -178,10 +177,8 @@ typedef struct xfs_sb { /* must be padded to 64 bit alignment */ } xfs_sb_t; -#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) - /* - * Superblock - on disk version. Must match the in core version above. + * Superblock - on disk version. * Must be padded to 64 bit alignment. */ struct xfs_dsb { @@ -265,6 +262,8 @@ struct xfs_dsb { /* must be padded to 64 bit alignment */ }; +#define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc) + /* * Misc. Flags - warning - these will be cleared by xfs_repair unless * a feature bit is set when the flag is used. diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 14c81f227c5b..f8d5ed7aedde 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1946,6 +1946,21 @@ retry: } return -ENOSPC; } + + /* + * Protect against obviously corrupt allocation btree records. Later + * xfs_iget checks will catch re-allocation of other active in-memory + * and on-disk inodes. If we don't catch reallocating the parent inode + * here we will deadlock in xfs_iget() so we have to do these checks + * first. + */ + if (ino == parent || !xfs_verify_dir_ino(mp, ino)) { + xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); + xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino), + XFS_SICK_AG_INOBT); + return -EFSCORRUPTED; + } + *new_ino = ino; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c new file mode 100644 index 000000000000..032333289113 --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -0,0 +1,749 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + */ +#include <linux/iversion.h> +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_inode_util.h" +#include "xfs_trans.h" +#include "xfs_ialloc.h" +#include "xfs_health.h" +#include "xfs_bmap.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_ag.h" +#include "xfs_iunlink_item.h" +#include "xfs_inode_item.h" + +uint16_t +xfs_flags2diflags( + struct xfs_inode *ip, + unsigned int xflags) +{ + /* can't set PREALLOC this way, just preserve it */ + uint16_t di_flags = + (ip->i_diflags & XFS_DIFLAG_PREALLOC); + + if (xflags & FS_XFLAG_IMMUTABLE) + di_flags |= XFS_DIFLAG_IMMUTABLE; + if (xflags & FS_XFLAG_APPEND) + di_flags |= XFS_DIFLAG_APPEND; + if (xflags & FS_XFLAG_SYNC) + di_flags |= XFS_DIFLAG_SYNC; + if (xflags & FS_XFLAG_NOATIME) + di_flags |= XFS_DIFLAG_NOATIME; + if (xflags & FS_XFLAG_NODUMP) + di_flags |= XFS_DIFLAG_NODUMP; + if (xflags & FS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (xflags & FS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; + if (S_ISDIR(VFS_I(ip)->i_mode)) { + if (xflags & FS_XFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (xflags & FS_XFLAG_NOSYMLINKS) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (xflags & FS_XFLAG_EXTSZINHERIT) + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + if (xflags & FS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + } else if (S_ISREG(VFS_I(ip)->i_mode)) { + if (xflags & FS_XFLAG_REALTIME) + di_flags |= XFS_DIFLAG_REALTIME; + if (xflags & FS_XFLAG_EXTSIZE) + di_flags |= XFS_DIFLAG_EXTSIZE; + } + + return di_flags; +} + +uint64_t +xfs_flags2diflags2( + struct xfs_inode *ip, + unsigned int xflags) +{ + uint64_t di_flags2 = + (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | + XFS_DIFLAG2_BIGTIME | + XFS_DIFLAG2_NREXT64)); + + if (xflags & FS_XFLAG_DAX) + di_flags2 |= XFS_DIFLAG2_DAX; + if (xflags & FS_XFLAG_COWEXTSIZE) + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + + return di_flags2; +} + +uint32_t +xfs_ip2xflags( + struct xfs_inode *ip) +{ + uint32_t flags = 0; + + if (ip->i_diflags & XFS_DIFLAG_ANY) { + if (ip->i_diflags & XFS_DIFLAG_REALTIME) + flags |= FS_XFLAG_REALTIME; + if (ip->i_diflags & XFS_DIFLAG_PREALLOC) + flags |= FS_XFLAG_PREALLOC; + if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) + flags |= FS_XFLAG_IMMUTABLE; + if (ip->i_diflags & XFS_DIFLAG_APPEND) + flags |= FS_XFLAG_APPEND; + if (ip->i_diflags & XFS_DIFLAG_SYNC) + flags |= FS_XFLAG_SYNC; + if (ip->i_diflags & XFS_DIFLAG_NOATIME) + flags |= FS_XFLAG_NOATIME; + if (ip->i_diflags & XFS_DIFLAG_NODUMP) + flags |= FS_XFLAG_NODUMP; + if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) + flags |= FS_XFLAG_RTINHERIT; + if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) + flags |= FS_XFLAG_PROJINHERIT; + if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) + flags |= FS_XFLAG_NOSYMLINKS; + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) + flags |= FS_XFLAG_EXTSIZE; + if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) + flags |= FS_XFLAG_EXTSZINHERIT; + if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) + flags |= FS_XFLAG_NODEFRAG; + if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) + flags |= FS_XFLAG_FILESTREAM; + } + + if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { + if (ip->i_diflags2 & XFS_DIFLAG2_DAX) + flags |= FS_XFLAG_DAX; + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) + flags |= FS_XFLAG_COWEXTSIZE; + } + + if (xfs_inode_has_attr_fork(ip)) + flags |= FS_XFLAG_HASATTR; + return flags; +} + +prid_t +xfs_get_initial_prid(struct xfs_inode *dp) +{ + if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT) + return dp->i_projid; + + /* Assign to the root project by default. */ + return 0; +} + +/* Propagate di_flags from a parent inode to a child inode. */ +static inline void +xfs_inode_inherit_flags( + struct xfs_inode *ip, + const struct xfs_inode *pip) +{ + unsigned int di_flags = 0; + xfs_failaddr_t failaddr; + umode_t mode = VFS_I(ip)->i_mode; + + if (S_ISDIR(mode)) { + if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + ip->i_extsize = pip->i_extsize; + } + if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + } else if (S_ISREG(mode)) { + if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && + xfs_has_realtime(ip->i_mount)) + di_flags |= XFS_DIFLAG_REALTIME; + if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { + di_flags |= XFS_DIFLAG_EXTSIZE; + ip->i_extsize = pip->i_extsize; + } + } + if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && + xfs_inherit_noatime) + di_flags |= XFS_DIFLAG_NOATIME; + if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && + xfs_inherit_nodump) + di_flags |= XFS_DIFLAG_NODUMP; + if ((pip->i_diflags & XFS_DIFLAG_SYNC) && + xfs_inherit_sync) + di_flags |= XFS_DIFLAG_SYNC; + if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && + xfs_inherit_nosymlinks) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && + xfs_inherit_nodefrag) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; + + ip->i_diflags |= di_flags; + + /* + * Inode verifiers on older kernels only check that the extent size + * hint is an integer multiple of the rt extent size on realtime files. + * They did not check the hint alignment on a directory with both + * rtinherit and extszinherit flags set. If the misaligned hint is + * propagated from a directory into a new realtime file, new file + * allocations will fail due to math errors in the rt allocator and/or + * trip the verifiers. Validate the hint settings in the new file so + * that we don't let broken hints propagate. + */ + failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, + VFS_I(ip)->i_mode, ip->i_diflags); + if (failaddr) { + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | + XFS_DIFLAG_EXTSZINHERIT); + ip->i_extsize = 0; + } +} + +/* Propagate di_flags2 from a parent inode to a child inode. */ +static inline void +xfs_inode_inherit_flags2( + struct xfs_inode *ip, + const struct xfs_inode *pip) +{ + xfs_failaddr_t failaddr; + + if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { + ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; + ip->i_cowextsize = pip->i_cowextsize; + } + if (pip->i_diflags2 & XFS_DIFLAG2_DAX) + ip->i_diflags2 |= XFS_DIFLAG2_DAX; + + /* Don't let invalid cowextsize hints propagate. */ + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, + VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); + if (failaddr) { + ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; + ip->i_cowextsize = 0; + } +} + +/* + * If we need to create attributes immediately after allocating the inode, + * initialise an empty attribute fork right now. We use the default fork offset + * for attributes here as we don't know exactly what size or how many + * attributes we might be adding. We can do this safely here because we know + * the data fork is completely empty and this saves us from needing to run a + * separate transaction to set the fork offset in the immediate future. + * + * If we have parent pointers and the caller hasn't told us that the file will + * never be linked into a directory tree, we /must/ create the attr fork. + */ +static inline bool +xfs_icreate_want_attrfork( + struct xfs_mount *mp, + const struct xfs_icreate_args *args) +{ + if (args->flags & XFS_ICREATE_INIT_XATTRS) + return true; + + if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp)) + return true; + + return false; +} + +/* Initialise an inode's attributes. */ +void +xfs_inode_init( + struct xfs_trans *tp, + const struct xfs_icreate_args *args, + struct xfs_inode *ip) +{ + struct xfs_inode *pip = args->pip; + struct inode *dir = pip ? VFS_I(pip) : NULL; + struct xfs_mount *mp = tp->t_mountp; + struct inode *inode = VFS_I(ip); + unsigned int flags; + int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG | + XFS_ICHGTIME_ACCESS; + + if (args->flags & XFS_ICREATE_TMPFILE) + set_nlink(inode, 0); + else if (S_ISDIR(args->mode)) + set_nlink(inode, 2); + else + set_nlink(inode, 1); + inode->i_rdev = args->rdev; + + if (!args->idmap || pip == NULL) { + /* creating a tree root, sb rooted, or detached file */ + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + ip->i_projid = 0; + inode->i_mode = args->mode; + } else { + /* creating a child in the directory tree */ + if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { + inode_fsuid_set(inode, args->idmap); + inode->i_gid = dir->i_gid; + inode->i_mode = args->mode; + } else { + inode_init_owner(args->idmap, inode, dir, args->mode); + } + + /* + * If the group ID of the new file does not match the effective + * group ID or one of the supplementary group IDs, the S_ISGID + * bit is cleared (and only if the irix_sgid_inherit + * compatibility variable is set). + */ + if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && + !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode))) + inode->i_mode &= ~S_ISGID; + + ip->i_projid = pip ? xfs_get_initial_prid(pip) : 0; + } + + ip->i_disk_size = 0; + ip->i_df.if_nextents = 0; + ASSERT(ip->i_nblocks == 0); + + ip->i_extsize = 0; + ip->i_diflags = 0; + + if (xfs_has_v3inodes(mp)) { + inode_set_iversion(inode, 1); + ip->i_cowextsize = 0; + times |= XFS_ICHGTIME_CREATE; + } + + xfs_trans_ichgtime(tp, ip, times); + + flags = XFS_ILOG_CORE; + switch (args->mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + ip->i_df.if_format = XFS_DINODE_FMT_DEV; + flags |= XFS_ILOG_DEV; + break; + case S_IFREG: + case S_IFDIR: + if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) + xfs_inode_inherit_flags(ip, pip); + if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) + xfs_inode_inherit_flags2(ip, pip); + fallthrough; + case S_IFLNK: + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; + ip->i_df.if_bytes = 0; + ip->i_df.if_data = NULL; + break; + default: + ASSERT(0); + } + + if (xfs_icreate_want_attrfork(mp, args)) { + ip->i_forkoff = xfs_default_attroffset(ip) >> 3; + xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); + + if (!xfs_has_attr(mp)) { + spin_lock(&mp->m_sb_lock); + xfs_add_attr(mp); + spin_unlock(&mp->m_sb_lock); + xfs_log_sb(tp); + } + } + + xfs_trans_log_inode(tp, ip, flags); +} + +/* + * In-Core Unlinked List Lookups + * ============================= + * + * Every inode is supposed to be reachable from some other piece of metadata + * with the exception of the root directory. Inodes with a connection to a + * file descriptor but not linked from anywhere in the on-disk directory tree + * are collectively known as unlinked inodes, though the filesystem itself + * maintains links to these inodes so that on-disk metadata are consistent. + * + * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI + * header contains a number of buckets that point to an inode, and each inode + * record has a pointer to the next inode in the hash chain. This + * singly-linked list causes scaling problems in the iunlink remove function + * because we must walk that list to find the inode that points to the inode + * being removed from the unlinked hash bucket list. + * + * Hence we keep an in-memory double linked list to link each inode on an + * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer + * based lists would require having 64 list heads in the perag, one for each + * list. This is expensive in terms of memory (think millions of AGs) and cache + * misses on lookups. Instead, use the fact that inodes on the unlinked list + * must be referenced at the VFS level to keep them on the list and hence we + * have an existence guarantee for inodes on the unlinked list. + * + * Given we have an existence guarantee, we can use lockless inode cache lookups + * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode + * for the double linked unlinked list, and we don't need any extra locking to + * keep the list safe as all manipulations are done under the AGI buffer lock. + * Keeping the list up to date does not require memory allocation, just finding + * the XFS inode and updating the next/prev unlinked list aginos. + */ + +/* + * Update the prev pointer of the next agino. Returns -ENOLINK if the inode + * is not in cache. + */ +static int +xfs_iunlink_update_backref( + struct xfs_perag *pag, + xfs_agino_t prev_agino, + xfs_agino_t next_agino) +{ + struct xfs_inode *ip; + + /* No update necessary if we are at the end of the list. */ + if (next_agino == NULLAGINO) + return 0; + + ip = xfs_iunlink_lookup(pag, next_agino); + if (!ip) + return -ENOLINK; + + ip->i_prev_unlinked = prev_agino; + return 0; +} + +/* + * Point the AGI unlinked bucket at an inode and log the results. The caller + * is responsible for validating the old value. + */ +STATIC int +xfs_iunlink_update_bucket( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agibp, + unsigned int bucket_index, + xfs_agino_t new_agino) +{ + struct xfs_agi *agi = agibp->b_addr; + xfs_agino_t old_value; + int offset; + + ASSERT(xfs_verify_agino_or_null(pag, new_agino)); + + old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); + trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, + old_value, new_agino); + + /* + * We should never find the head of the list already set to the value + * passed in because either we're adding or removing ourselves from the + * head of the list. + */ + if (old_value == new_agino) { + xfs_buf_mark_corrupt(agibp); + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + return -EFSCORRUPTED; + } + + agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); + offset = offsetof(struct xfs_agi, agi_unlinked) + + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); + return 0; +} + +static int +xfs_iunlink_insert_inode( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agibp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = agibp->b_addr; + xfs_agino_t next_agino; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; + int error; + + /* + * Get the index into the agi hash table for the list this inode will + * go on. Make sure the pointer isn't garbage and that this inode + * isn't already on the list. + */ + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || + !xfs_verify_agino_or_null(pag, next_agino)) { + xfs_buf_mark_corrupt(agibp); + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + return -EFSCORRUPTED; + } + + /* + * Update the prev pointer in the next inode to point back to this + * inode. + */ + error = xfs_iunlink_update_backref(pag, agino, next_agino); + if (error == -ENOLINK) + error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); + if (error) + return error; + + if (next_agino != NULLAGINO) { + /* + * There is already another inode in the bucket, so point this + * inode to the current head of the list. + */ + error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); + if (error) + return error; + ip->i_next_unlinked = next_agino; + } + + /* Point the head of the list to point to this inode. */ + ip->i_prev_unlinked = NULLAGINO; + return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); +} + +/* + * This is called when the inode's link count has gone to 0 or we are creating + * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. + * + * We place the on-disk inode on a list in the AGI. It will be pulled from this + * list when the inode is freed. + */ +int +xfs_iunlink( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + struct xfs_buf *agibp; + int error; + + ASSERT(VFS_I(ip)->i_nlink == 0); + ASSERT(VFS_I(ip)->i_mode != 0); + trace_xfs_iunlink(ip); + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = xfs_read_agi(pag, tp, 0, &agibp); + if (error) + goto out; + + error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); +out: + xfs_perag_put(pag); + return error; +} + +static int +xfs_iunlink_remove_inode( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agibp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = agibp->b_addr; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + xfs_agino_t head_agino; + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; + int error; + + trace_xfs_iunlink_remove(ip); + + /* + * Get the index into the agi hash table for the list this inode will + * go on. Make sure the head pointer isn't garbage. + */ + head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (!xfs_verify_agino(pag, head_agino)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + agi, sizeof(*agi)); + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + return -EFSCORRUPTED; + } + + /* + * Set our inode's next_unlinked pointer to NULL and then return + * the old pointer value so that we can update whatever was previous + * to us in the list to point to whatever was next in the list. + */ + error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); + if (error) + return error; + + /* + * Update the prev pointer in the next inode to point back to previous + * inode in the chain. + */ + error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, + ip->i_next_unlinked); + if (error == -ENOLINK) + error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, + ip->i_next_unlinked); + if (error) + return error; + + if (head_agino != agino) { + struct xfs_inode *prev_ip; + + prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); + if (!prev_ip) { + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); + return -EFSCORRUPTED; + } + + error = xfs_iunlink_log_inode(tp, prev_ip, pag, + ip->i_next_unlinked); + prev_ip->i_next_unlinked = ip->i_next_unlinked; + } else { + /* Point the head of the list to the next unlinked inode. */ + error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, + ip->i_next_unlinked); + } + + ip->i_next_unlinked = NULLAGINO; + ip->i_prev_unlinked = 0; + return error; +} + +/* + * Pull the on-disk inode from the AGI unlinked list. + */ +int +xfs_iunlink_remove( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + struct xfs_buf *agibp; + int error; + + trace_xfs_iunlink_remove(ip); + + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = xfs_read_agi(pag, tp, 0, &agibp); + if (error) + return error; + + return xfs_iunlink_remove_inode(tp, pag, agibp, ip); +} + +/* + * Decrement the link count on an inode & log the change. If this causes the + * link count to go to zero, move the inode to AGI unlinked list so that it can + * be freed when the last active reference goes away via xfs_inactive(). + */ +int +xfs_droplink( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + + if (inode->i_nlink == 0) { + xfs_info_ratelimited(tp->t_mountp, + "Inode 0x%llx link count dropped below zero. Pinning link count.", + ip->i_ino); + set_nlink(inode, XFS_NLINK_PINNED); + } + if (inode->i_nlink != XFS_NLINK_PINNED) + drop_nlink(inode); + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + if (inode->i_nlink) + return 0; + + return xfs_iunlink(tp, ip); +} + +/* + * Increment the link count on an inode & log the change. + */ +void +xfs_bumplink( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + + if (inode->i_nlink == XFS_NLINK_PINNED - 1) + xfs_info_ratelimited(tp->t_mountp, + "Inode 0x%llx link count exceeded maximum. Pinning link count.", + ip->i_ino); + if (inode->i_nlink != XFS_NLINK_PINNED) + inc_nlink(inode); + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} + +/* Free an inode in the ondisk index and zero it out. */ +int +xfs_inode_uninit( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_inode *ip, + struct xfs_icluster *xic) +{ + struct xfs_mount *mp = ip->i_mount; + int error; + + /* + * Free the inode first so that we guarantee that the AGI lock is going + * to be taken before we remove the inode from the unlinked list. This + * makes the AGI lock -> unlinked list modification order the same as + * used in O_TMPFILE creation. + */ + error = xfs_difree(tp, pag, ip->i_ino, xic); + if (error) + return error; + + error = xfs_iunlink_remove(tp, pag, ip); + if (error) + return error; + + /* + * Free any local-format data sitting around before we reset the + * data fork to extents format. Note that the attr fork data has + * already been freed by xfs_attr_inactive. + */ + if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { + kfree(ip->i_df.if_data); + ip->i_df.if_data = NULL; + ip->i_df.if_bytes = 0; + } + + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ + ip->i_diflags = 0; + ip->i_diflags2 = mp->m_ino_geo.new_diflags2; + ip->i_forkoff = 0; /* mark the attr fork not in use */ + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; + + /* + * Bump the generation count so no one will be confused + * by reincarnations of this inode. + */ + VFS_I(ip)->i_generation++; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_inode_util.h b/fs/xfs/libxfs/xfs_inode_util.h new file mode 100644 index 000000000000..060242998a23 --- /dev/null +++ b/fs/xfs/libxfs/xfs_inode_util.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + */ +#ifndef __XFS_INODE_UTIL_H__ +#define __XFS_INODE_UTIL_H__ + +struct xfs_icluster; + +uint16_t xfs_flags2diflags(struct xfs_inode *ip, unsigned int xflags); +uint64_t xfs_flags2diflags2(struct xfs_inode *ip, unsigned int xflags); +uint32_t xfs_dic2xflags(struct xfs_inode *ip); +uint32_t xfs_ip2xflags(struct xfs_inode *ip); + +prid_t xfs_get_initial_prid(struct xfs_inode *dp); + +/* + * File creation context. + * + * Due to our only partial reliance on the VFS to propagate uid and gid values + * according to accepted Unix behaviors, callers must initialize idmap to the + * correct idmapping structure to get the correct inheritance behaviors when + * XFS_MOUNT_GRPID is set. + * + * To create files detached from the directory tree (e.g. quota inodes), set + * idmap to NULL. To create a tree root, set pip to NULL. + */ +struct xfs_icreate_args { + struct mnt_idmap *idmap; + struct xfs_inode *pip; /* parent inode or null */ + dev_t rdev; + umode_t mode; + +#define XFS_ICREATE_TMPFILE (1U << 0) /* create an unlinked file */ +#define XFS_ICREATE_INIT_XATTRS (1U << 1) /* will set xattrs immediately */ +#define XFS_ICREATE_UNLINKABLE (1U << 2) /* cannot link into dir tree */ + uint16_t flags; +}; + +/* + * Flags for xfs_trans_ichgtime(). + */ +#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ +#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ +#define XFS_ICHGTIME_ACCESS 0x8 /* last access timestamp */ +void xfs_trans_ichgtime(struct xfs_trans *tp, struct xfs_inode *ip, int flags); + +void xfs_inode_init(struct xfs_trans *tp, const struct xfs_icreate_args *args, + struct xfs_inode *ip); + +int xfs_inode_uninit(struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_inode *ip, struct xfs_icluster *xic); + +int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip); +int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_inode *ip); +int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip); +void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip); + +#endif /* __XFS_INODE_UTIL_H__ */ diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index e8cdd77d03fa..23c133fd36f5 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -85,6 +85,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12); */ + XFS_CHECK_OFFSET(struct xfs_dsb, sb_crc, 224); XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0); XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2); XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 34f104ed372c..2f7413afbf46 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -177,13 +177,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_REFC_BTREE_REF 1 #define XFS_SSB_REF 0 -/* - * Flags for xfs_trans_ichgtime(). - */ -#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ -#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ - /* Computed inode geometry for the filesystem. */ struct xfs_ino_geometry { /* Maximum inode count in this filesystem. */ diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 69fc5b981352..3c40f37e82c7 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -68,6 +68,8 @@ xfs_trans_ichgtime( inode_set_mtime_to_ts(inode, tv); if (flags & XFS_ICHGTIME_CHG) inode_set_ctime_to_ts(inode, tv); + if (flags & XFS_ICHGTIME_ACCESS) + inode_set_atime_to_ts(inode, tv); if (flags & XFS_ICHGTIME_CREATE) ip->i_crtime = tv; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 1ad8ec63a7f4..22f5f1a9d3f0 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -26,6 +26,7 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" +#include "xfs_dir2.h" #include "xfs_attr.h" #include "xfs_reflink.h" #include "xfs_ag.h" diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index b747b625c5ee..d390d56cd875 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -40,11 +40,16 @@ xrep_tempfile_create( struct xfs_scrub *sc, uint16_t mode) { + struct xfs_icreate_args args = { + .pip = sc->mp->m_rootip, + .mode = mode, + .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE, + }; struct xfs_mount *mp = sc->mp; struct xfs_trans *tp = NULL; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; + struct xfs_dquot *udqp; + struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; struct xfs_trans_res *tres; struct xfs_inode *dp = mp->m_rootip; xfs_ino_t ino; @@ -65,8 +70,7 @@ xrep_tempfile_create( * inode should be completely root owned so that we don't fail due to * quota limits. */ - error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, - XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); + error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); if (error) return error; @@ -87,14 +91,11 @@ xrep_tempfile_create( error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); if (error) goto out_trans_cancel; - error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0, - 0, false, &sc->tempip); + error = xfs_icreate(tp, ino, &args, &sc->tempip); if (error) goto out_trans_cancel; - /* Change the ownership of the inode to root. */ - VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID; - VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID; + /* We don't touch file data, so drop the realtime flags. */ sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE); diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index f6ffb4f248f7..9355ccad9503 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -10,6 +10,10 @@ #define DEBUG 1 #endif +#ifdef CONFIG_XFS_DEBUG_EXPENSIVE +#define DEBUG_EXPENSIVE 1 +#endif + #ifdef CONFIG_XFS_ASSERT_FATAL #define XFS_ASSERT_FATAL 1 #endif diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 43031842341a..47549cfa61cd 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -22,6 +22,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_error.h" struct kmem_cache *xfs_buf_item_cache; @@ -781,8 +782,39 @@ xfs_buf_item_committed( return lsn; } +#ifdef DEBUG_EXPENSIVE +static int +xfs_buf_item_precommit( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_buf_log_item *bip = BUF_ITEM(lip); + struct xfs_buf *bp = bip->bli_buf; + struct xfs_mount *mp = bp->b_mount; + xfs_failaddr_t fa; + + if (!bp->b_ops || !bp->b_ops->verify_struct) + return 0; + if (bip->bli_flags & XFS_BLI_STALE) + return 0; + + fa = bp->b_ops->verify_struct(bp); + if (fa) { + xfs_buf_verifier_error(bp, -EFSCORRUPTED, bp->b_ops->name, + bp->b_addr, BBTOB(bp->b_length), fa); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + ASSERT(fa == NULL); + } + + return 0; +} +#else +# define xfs_buf_item_precommit NULL +#endif + static const struct xfs_item_ops xfs_buf_item_ops = { .iop_size = xfs_buf_item_size, + .iop_precommit = xfs_buf_item_precommit, .iop_format = xfs_buf_item_format, .iop_pin = xfs_buf_item_pin, .iop_unpin = xfs_buf_item_unpin, diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 6a1aae799cf1..7d19091215b0 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -17,6 +17,7 @@ #include "xfs_trans_priv.h" #include "xfs_qm.h" #include "xfs_log.h" +#include "xfs_error.h" static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) { @@ -193,8 +194,38 @@ xfs_qm_dquot_logitem_committing( return xfs_qm_dquot_logitem_release(lip); } +#ifdef DEBUG_EXPENSIVE +static int +xfs_qm_dquot_logitem_precommit( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + struct xfs_mount *mp = dqp->q_mount; + struct xfs_disk_dquot ddq = { }; + xfs_failaddr_t fa; + + xfs_dquot_to_disk(&ddq, dqp); + fa = xfs_dquot_verify(mp, &ddq, dqp->q_id); + if (fa) { + XFS_CORRUPTION_ERROR("Bad dquot during logging", + XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq)); + xfs_alert(mp, + "Metadata corruption detected at %pS, dquot 0x%x", + fa, dqp->q_id); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + ASSERT(fa == NULL); + } + + return 0; +} +#else +# define xfs_qm_dquot_logitem_precommit NULL +#endif + static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = xfs_qm_dquot_logitem_size, + .iop_precommit = xfs_qm_dquot_logitem_precommit, .iop_format = xfs_qm_dquot_logitem_format, .iop_pin = xfs_qm_dquot_logitem_pin, .iop_unpin = xfs_qm_dquot_logitem_unpin, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4e3cd8971fc..62ca6c75117c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -42,55 +42,11 @@ #include "xfs_pnfs.h" #include "xfs_parent.h" #include "xfs_xattr.h" -#include "xfs_sb.h" +#include "xfs_inode_util.h" struct kmem_cache *xfs_inode_cache; /* - * helper function to extract extent size hint from inode - */ -xfs_extlen_t -xfs_get_extsz_hint( - struct xfs_inode *ip) -{ - /* - * No point in aligning allocations if we need to COW to actually - * write to them. - */ - if (xfs_is_always_cow_inode(ip)) - return 0; - if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) - return ip->i_extsize; - if (XFS_IS_REALTIME_INODE(ip) && - ip->i_mount->m_sb.sb_rextsize > 1) - return ip->i_mount->m_sb.sb_rextsize; - return 0; -} - -/* - * Helper function to extract CoW extent size hint from inode. - * Between the extent size hint and the CoW extent size hint, we - * return the greater of the two. If the value is zero (automatic), - * use the default size. - */ -xfs_extlen_t -xfs_get_cowextsz_hint( - struct xfs_inode *ip) -{ - xfs_extlen_t a, b; - - a = 0; - if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) - a = ip->i_cowextsize; - b = xfs_get_extsz_hint(ip); - - a = max(a, b); - if (a == 0) - return XFS_DEFAULT_COWEXTSZ_HINT; - return a; -} - -/* * These two are wrapper routines around the xfs_ilock() routine used to * centralize some grungy code. They are used in places that wish to lock the * inode solely for reading the extents. The reason these places can't just @@ -567,55 +523,6 @@ xfs_lock_two_inodes( } } -uint -xfs_ip2xflags( - struct xfs_inode *ip) -{ - uint flags = 0; - - if (ip->i_diflags & XFS_DIFLAG_ANY) { - if (ip->i_diflags & XFS_DIFLAG_REALTIME) - flags |= FS_XFLAG_REALTIME; - if (ip->i_diflags & XFS_DIFLAG_PREALLOC) - flags |= FS_XFLAG_PREALLOC; - if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) - flags |= FS_XFLAG_IMMUTABLE; - if (ip->i_diflags & XFS_DIFLAG_APPEND) - flags |= FS_XFLAG_APPEND; - if (ip->i_diflags & XFS_DIFLAG_SYNC) - flags |= FS_XFLAG_SYNC; - if (ip->i_diflags & XFS_DIFLAG_NOATIME) - flags |= FS_XFLAG_NOATIME; - if (ip->i_diflags & XFS_DIFLAG_NODUMP) - flags |= FS_XFLAG_NODUMP; - if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) - flags |= FS_XFLAG_RTINHERIT; - if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) - flags |= FS_XFLAG_PROJINHERIT; - if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) - flags |= FS_XFLAG_NOSYMLINKS; - if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) - flags |= FS_XFLAG_EXTSIZE; - if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) - flags |= FS_XFLAG_EXTSZINHERIT; - if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) - flags |= FS_XFLAG_NODEFRAG; - if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) - flags |= FS_XFLAG_FILESTREAM; - } - - if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { - if (ip->i_diflags2 & XFS_DIFLAG2_DAX) - flags |= FS_XFLAG_DAX; - if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) - flags |= FS_XFLAG_COWEXTSIZE; - } - - if (xfs_inode_has_attr_fork(ip)) - flags |= FS_XFLAG_HASATTR; - return flags; -} - /* * Lookups up an inode from "name". If ci_name is not NULL, then a CI match * is allowed, otherwise it has to be an exact match. If a CI match is found, @@ -657,97 +564,6 @@ out_unlock: return error; } -/* Propagate di_flags from a parent inode to a child inode. */ -static void -xfs_inode_inherit_flags( - struct xfs_inode *ip, - const struct xfs_inode *pip) -{ - unsigned int di_flags = 0; - xfs_failaddr_t failaddr; - umode_t mode = VFS_I(ip)->i_mode; - - if (S_ISDIR(mode)) { - if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - ip->i_extsize = pip->i_extsize; - } - if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - } else if (S_ISREG(mode)) { - if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && - xfs_has_realtime(ip->i_mount)) - di_flags |= XFS_DIFLAG_REALTIME; - if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { - di_flags |= XFS_DIFLAG_EXTSIZE; - ip->i_extsize = pip->i_extsize; - } - } - if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && - xfs_inherit_noatime) - di_flags |= XFS_DIFLAG_NOATIME; - if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && - xfs_inherit_nodump) - di_flags |= XFS_DIFLAG_NODUMP; - if ((pip->i_diflags & XFS_DIFLAG_SYNC) && - xfs_inherit_sync) - di_flags |= XFS_DIFLAG_SYNC; - if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && - xfs_inherit_nosymlinks) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && - xfs_inherit_nodefrag) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - - ip->i_diflags |= di_flags; - - /* - * Inode verifiers on older kernels only check that the extent size - * hint is an integer multiple of the rt extent size on realtime files. - * They did not check the hint alignment on a directory with both - * rtinherit and extszinherit flags set. If the misaligned hint is - * propagated from a directory into a new realtime file, new file - * allocations will fail due to math errors in the rt allocator and/or - * trip the verifiers. Validate the hint settings in the new file so - * that we don't let broken hints propagate. - */ - failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, - VFS_I(ip)->i_mode, ip->i_diflags); - if (failaddr) { - ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | - XFS_DIFLAG_EXTSZINHERIT); - ip->i_extsize = 0; - } -} - -/* Propagate di_flags2 from a parent inode to a child inode. */ -static void -xfs_inode_inherit_flags2( - struct xfs_inode *ip, - const struct xfs_inode *pip) -{ - xfs_failaddr_t failaddr; - - if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { - ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; - ip->i_cowextsize = pip->i_cowextsize; - } - if (pip->i_diflags2 & XFS_DIFLAG2_DAX) - ip->i_diflags2 |= XFS_DIFLAG2_DAX; - - /* Don't let invalid cowextsize hints propagate. */ - failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, - VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); - if (failaddr) { - ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; - ip->i_cowextsize = 0; - } -} - /* * Initialise a newly allocated inode and return the in-core inode to the * caller locked exclusively. @@ -755,39 +571,15 @@ xfs_inode_inherit_flags2( * Caller is responsible for unlocking the inode manually upon return */ int -xfs_init_new_inode( - struct mnt_idmap *idmap, +xfs_icreate( struct xfs_trans *tp, - struct xfs_inode *pip, xfs_ino_t ino, - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, - bool init_xattrs, + const struct xfs_icreate_args *args, struct xfs_inode **ipp) { - struct inode *dir = pip ? VFS_I(pip) : NULL; struct xfs_mount *mp = tp->t_mountp; - struct xfs_inode *ip; - unsigned int flags; + struct xfs_inode *ip = NULL; int error; - struct timespec64 tv; - struct inode *inode; - - /* - * Protect against obviously corrupt allocation btree records. Later - * xfs_iget checks will catch re-allocation of other active in-memory - * and on-disk inodes. If we don't catch reallocating the parent inode - * here we will deadlock in xfs_iget() so we have to do these checks - * first. - */ - if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) { - xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); - xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino), - XFS_SICK_AG_INOBT); - return -EFSCORRUPTED; - } /* * Get the in-core inode with the lock held exclusively to prevent @@ -798,96 +590,8 @@ xfs_init_new_inode( return error; ASSERT(ip != NULL); - inode = VFS_I(ip); - set_nlink(inode, nlink); - inode->i_rdev = rdev; - ip->i_projid = prid; - - if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { - inode_fsuid_set(inode, idmap); - inode->i_gid = dir->i_gid; - inode->i_mode = mode; - } else { - inode_init_owner(idmap, inode, dir, mode); - } - - /* - * If the group ID of the new file does not match the effective group - * ID or one of the supplementary group IDs, the S_ISGID bit is cleared - * (and only if the irix_sgid_inherit compatibility variable is set). - */ - if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && - !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) - inode->i_mode &= ~S_ISGID; - - ip->i_disk_size = 0; - ip->i_df.if_nextents = 0; - ASSERT(ip->i_nblocks == 0); - - tv = inode_set_ctime_current(inode); - inode_set_mtime_to_ts(inode, tv); - inode_set_atime_to_ts(inode, tv); - - ip->i_extsize = 0; - ip->i_diflags = 0; - - if (xfs_has_v3inodes(mp)) { - inode_set_iversion(inode, 1); - ip->i_cowextsize = 0; - ip->i_crtime = tv; - } - - flags = XFS_ILOG_CORE; - switch (mode & S_IFMT) { - case S_IFIFO: - case S_IFCHR: - case S_IFBLK: - case S_IFSOCK: - ip->i_df.if_format = XFS_DINODE_FMT_DEV; - flags |= XFS_ILOG_DEV; - break; - case S_IFREG: - case S_IFDIR: - if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) - xfs_inode_inherit_flags(ip, pip); - if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) - xfs_inode_inherit_flags2(ip, pip); - fallthrough; - case S_IFLNK: - ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; - ip->i_df.if_bytes = 0; - ip->i_df.if_data = NULL; - break; - default: - ASSERT(0); - } - - /* - * If we need to create attributes immediately after allocating the - * inode, initialise an empty attribute fork right now. We use the - * default fork offset for attributes here as we don't know exactly what - * size or how many attributes we might be adding. We can do this - * safely here because we know the data fork is completely empty and - * this saves us from needing to run a separate transaction to set the - * fork offset in the immediate future. - */ - if (init_xattrs) { - ip->i_forkoff = xfs_default_attroffset(ip) >> 3; - xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); - - if (!xfs_has_attr(mp)) { - spin_lock(&mp->m_sb_lock); - xfs_add_attr(mp); - spin_unlock(&mp->m_sb_lock); - xfs_log_sb(tp); - } - } - - /* - * Log the new values stuffed into the inode. - */ xfs_trans_ijoin(tp, ip, 0); - xfs_trans_log_inode(tp, ip, flags); + xfs_inode_init(tp, args, ip); /* now that we have an i_mode we can setup the inode structure */ xfs_setup_inode(ip); @@ -896,158 +600,60 @@ xfs_init_new_inode( return 0; } -/* - * Decrement the link count on an inode & log the change. If this causes the - * link count to go to zero, move the inode to AGI unlinked list so that it can - * be freed when the last active reference goes away via xfs_inactive(). - */ +/* Return dquots for the ids that will be assigned to a new file. */ int -xfs_droplink( - struct xfs_trans *tp, - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - - if (inode->i_nlink == 0) { - xfs_info_ratelimited(tp->t_mountp, - "Inode 0x%llx link count dropped below zero. Pinning link count.", - ip->i_ino); - set_nlink(inode, XFS_NLINK_PINNED); - } - if (inode->i_nlink != XFS_NLINK_PINNED) - drop_nlink(inode); - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - if (inode->i_nlink) - return 0; - - return xfs_iunlink(tp, ip); -} - -/* - * Increment the link count on an inode & log the change. - */ -void -xfs_bumplink( - struct xfs_trans *tp, - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - - if (inode->i_nlink == XFS_NLINK_PINNED - 1) - xfs_info_ratelimited(tp->t_mountp, - "Inode 0x%llx link count exceeded maximum. Pinning link count.", - ip->i_ino); - if (inode->i_nlink != XFS_NLINK_PINNED) - inc_nlink(inode); - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -} - -#ifdef CONFIG_XFS_LIVE_HOOKS -/* - * Use a static key here to reduce the overhead of directory live update hooks. - * If the compiler supports jump labels, the static branch will be replaced by - * a nop sled when there are no hook users. Online fsck is currently the only - * caller, so this is a reasonable tradeoff. - * - * Note: Patching the kernel code requires taking the cpu hotplug lock. Other - * parts of the kernel allocate memory with that lock held, which means that - * XFS callers cannot hold any locks that might be used by memory reclaim or - * writeback when calling the static_branch_{inc,dec} functions. - */ -DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch); - -void -xfs_dir_hook_disable(void) -{ - xfs_hooks_switch_off(&xfs_dir_hooks_switch); -} - -void -xfs_dir_hook_enable(void) -{ - xfs_hooks_switch_on(&xfs_dir_hooks_switch); -} - -/* Call hooks for a directory update relating to a child dirent update. */ -inline void -xfs_dir_update_hook( - struct xfs_inode *dp, - struct xfs_inode *ip, - int delta, - const struct xfs_name *name) -{ - if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) { - struct xfs_dir_update_params p = { - .dp = dp, - .ip = ip, - .delta = delta, - .name = name, - }; - struct xfs_mount *mp = ip->i_mount; - - xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p); +xfs_icreate_dqalloc( + const struct xfs_icreate_args *args, + struct xfs_dquot **udqpp, + struct xfs_dquot **gdqpp, + struct xfs_dquot **pdqpp) +{ + struct inode *dir = VFS_I(args->pip); + kuid_t uid = GLOBAL_ROOT_UID; + kgid_t gid = GLOBAL_ROOT_GID; + prid_t prid = 0; + unsigned int flags = XFS_QMOPT_QUOTALL; + + if (args->idmap) { + /* + * The uid/gid computation code must match what the VFS uses to + * assign i_[ug]id. INHERIT adjusts the gid computation for + * setgid/grpid systems. + */ + uid = mapped_fsuid(args->idmap, i_user_ns(dir)); + gid = mapped_fsgid(args->idmap, i_user_ns(dir)); + prid = xfs_get_initial_prid(args->pip); + flags |= XFS_QMOPT_INHERIT; } -} -/* Call the specified function during a directory update. */ -int -xfs_dir_hook_add( - struct xfs_mount *mp, - struct xfs_dir_hook *hook) -{ - return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook); -} + *udqpp = *gdqpp = *pdqpp = NULL; -/* Stop calling the specified function during a directory update. */ -void -xfs_dir_hook_del( - struct xfs_mount *mp, - struct xfs_dir_hook *hook) -{ - xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook); + return xfs_qm_vop_dqalloc(args->pip, uid, gid, prid, flags, udqpp, + gdqpp, pdqpp); } -/* Configure directory update hook functions. */ -void -xfs_dir_hook_setup( - struct xfs_dir_hook *hook, - notifier_fn_t mod_fn) -{ - xfs_hook_setup(&hook->dirent_hook, mod_fn); -} -#endif /* CONFIG_XFS_LIVE_HOOKS */ - int xfs_create( - struct mnt_idmap *idmap, - struct xfs_inode *dp, + const struct xfs_icreate_args *args, struct xfs_name *name, - umode_t mode, - dev_t rdev, - bool init_xattrs, - xfs_inode_t **ipp) + struct xfs_inode **ipp) { - int is_dir = S_ISDIR(mode); + struct xfs_inode *dp = args->pip; + struct xfs_dir_update du = { + .dp = dp, + .name = name, + }; struct xfs_mount *mp = dp->i_mount; - struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; - int error; - bool unlock_dp_on_error = false; - prid_t prid; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; + struct xfs_dquot *udqp; + struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; struct xfs_trans_res *tres; - uint resblks; xfs_ino_t ino; - struct xfs_parent_args *ppargs; + bool unlock_dp_on_error = false; + bool is_dir = S_ISDIR(args->mode); + uint resblks; + int error; trace_xfs_create(dp, name); @@ -1056,15 +662,8 @@ xfs_create( if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) return -EIO; - prid = xfs_get_initial_prid(dp); - - /* - * Make sure that we have allocated dquot(s) on disk. - */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), - mapped_fsgid(idmap, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + /* Make sure that we have allocated dquot(s) on disk. */ + error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp); if (error) return error; @@ -1076,7 +675,7 @@ xfs_create( tres = &M_RES(mp)->tr_create; } - error = xfs_parent_start(mp, &ppargs); + error = xfs_parent_start(mp, &du.ppargs); if (error) goto out_release_dquots; @@ -1105,10 +704,9 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); if (!error) - error = xfs_init_new_inode(idmap, tp, dp, ino, mode, - is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); + error = xfs_icreate(tp, ino, args, &du.ip); if (error) goto out_trans_cancel; @@ -1121,38 +719,9 @@ xfs_create( */ xfs_trans_ijoin(tp, dp, 0); - error = xfs_dir_createname(tp, dp, name, ip->i_ino, - resblks - XFS_IALLOC_SPACE_RES(mp)); - if (error) { - ASSERT(error != -ENOSPC); + error = xfs_dir_create_child(tp, resblks, &du); + if (error) goto out_trans_cancel; - } - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - - if (is_dir) { - error = xfs_dir_init(tp, ip, dp); - if (error) - goto out_trans_cancel; - - xfs_bumplink(tp, dp); - } - - /* - * If we have parent pointers, we need to add the attribute containing - * the parent information now. - */ - if (ppargs) { - error = xfs_parent_addname(tp, ppargs, dp, name, ip); - if (error) - goto out_trans_cancel; - } - - /* - * Create ip with a reference from dp, and add '.' and '..' references - * if it's a directory. - */ - xfs_dir_update_hook(dp, ip, 1, name); /* * If this is a synchronous mount, make sure that the @@ -1167,7 +736,7 @@ xfs_create( * These ids of the inode couldn't have changed since the new * inode has been locked ever since it was created. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); + xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp); error = xfs_trans_commit(tp); if (error) @@ -1177,10 +746,10 @@ xfs_create( xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); - *ipp = ip; - xfs_iunlock(ip, XFS_ILOCK_EXCL); + *ipp = du.ip; + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); return 0; out_trans_cancel: @@ -1191,13 +760,13 @@ xfs_create( * setup of the inode and release the inode. This prevents recursive * transactions and deadlocks from xfs_inactive. */ - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_finish_inode_setup(ip); - xfs_irele(ip); + if (du.ip) { + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); + xfs_finish_inode_setup(du.ip); + xfs_irele(du.ip); } out_parent: - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -1210,36 +779,28 @@ xfs_create( int xfs_create_tmpfile( - struct mnt_idmap *idmap, - struct xfs_inode *dp, - umode_t mode, - bool init_xattrs, + const struct xfs_icreate_args *args, struct xfs_inode **ipp) { + struct xfs_inode *dp = args->pip; struct xfs_mount *mp = dp->i_mount; struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; - int error; - prid_t prid; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; + struct xfs_dquot *udqp; + struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; struct xfs_trans_res *tres; - uint resblks; xfs_ino_t ino; + uint resblks; + int error; + + ASSERT(args->flags & XFS_ICREATE_TMPFILE); if (xfs_is_shutdown(mp)) return -EIO; - prid = xfs_get_initial_prid(dp); - - /* - * Make sure that we have allocated dquot(s) on disk. - */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), - mapped_fsgid(idmap, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + /* Make sure that we have allocated dquot(s) on disk. */ + error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp); if (error) return error; @@ -1251,10 +812,9 @@ xfs_create_tmpfile( if (error) goto out_release_dquots; - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); if (!error) - error = xfs_init_new_inode(idmap, tp, dp, ino, mode, - 0, 0, prid, init_xattrs, &ip); + error = xfs_icreate(tp, ino, args, &ip); if (error) goto out_trans_cancel; @@ -1311,11 +871,15 @@ xfs_link( struct xfs_inode *sip, struct xfs_name *target_name) { + struct xfs_dir_update du = { + .dp = tdp, + .name = target_name, + .ip = sip, + }; struct xfs_mount *mp = tdp->i_mount; struct xfs_trans *tp; int error, nospace_error = 0; int resblks; - struct xfs_parent_args *ppargs; trace_xfs_link(tdp, target_name); @@ -1334,7 +898,7 @@ xfs_link( if (error) goto std_return; - error = xfs_parent_start(mp, &ppargs); + error = xfs_parent_start(mp, &du.ppargs); if (error) goto std_return; @@ -1349,7 +913,7 @@ xfs_link( * pointers are enabled because we can't back out if the xattrs must * grow. */ - if (ppargs && nospace_error) { + if (du.ppargs && nospace_error) { error = nospace_error; goto error_return; } @@ -1376,47 +940,9 @@ xfs_link( } } - if (!resblks) { - error = xfs_dir_canenter(tp, tdp, target_name); - if (error) - goto error_return; - } - - /* - * Handle initial link state of O_TMPFILE inode - */ - if (VFS_I(sip)->i_nlink == 0) { - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino)); - error = xfs_iunlink_remove(tp, pag, sip); - xfs_perag_put(pag); - if (error) - goto error_return; - } - - error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, - resblks); + error = xfs_dir_add_child(tp, resblks, &du); if (error) goto error_return; - xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); - - xfs_bumplink(tp, sip); - - /* - * If we have parent pointers, we now need to add the parent record to - * the attribute fork of the inode. If this is the initial parent - * attribute, we need to create it correctly, otherwise we can just add - * the parent to the inode. - */ - if (ppargs) { - error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip); - if (error) - goto error_return; - } - - xfs_dir_update_hook(tdp, sip, 1, target_name); /* * If this is a synchronous mount, make sure that the @@ -1429,7 +955,7 @@ xfs_link( error = xfs_trans_commit(tp); xfs_iunlock(tdp, XFS_ILOCK_EXCL); xfs_iunlock(sip, XFS_ILOCK_EXCL); - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); return error; error_return: @@ -1437,7 +963,7 @@ xfs_link( xfs_iunlock(tdp, XFS_ILOCK_EXCL); xfs_iunlock(sip, XFS_ILOCK_EXCL); out_parent: - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); std_return: if (error == -ENOSPC && nospace_error) error = nospace_error; @@ -2024,39 +1550,6 @@ out: } /* - * In-Core Unlinked List Lookups - * ============================= - * - * Every inode is supposed to be reachable from some other piece of metadata - * with the exception of the root directory. Inodes with a connection to a - * file descriptor but not linked from anywhere in the on-disk directory tree - * are collectively known as unlinked inodes, though the filesystem itself - * maintains links to these inodes so that on-disk metadata are consistent. - * - * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI - * header contains a number of buckets that point to an inode, and each inode - * record has a pointer to the next inode in the hash chain. This - * singly-linked list causes scaling problems in the iunlink remove function - * because we must walk that list to find the inode that points to the inode - * being removed from the unlinked hash bucket list. - * - * Hence we keep an in-memory double linked list to link each inode on an - * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer - * based lists would require having 64 list heads in the perag, one for each - * list. This is expensive in terms of memory (think millions of AGs) and cache - * misses on lookups. Instead, use the fact that inodes on the unlinked list - * must be referenced at the VFS level to keep them on the list and hence we - * have an existence guarantee for inodes on the unlinked list. - * - * Given we have an existence guarantee, we can use lockless inode cache lookups - * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode - * for the double linked unlinked list, and we don't need any extra locking to - * keep the list safe as all manipulations are done under the AGI buffer lock. - * Keeping the list up to date does not require memory allocation, just finding - * the XFS inode and updating the next/prev unlinked list aginos. - */ - -/* * Find an inode on the unlinked list. This does not take references to the * inode as we have existence guarantees by holding the AGI buffer lock and that * only unlinked, referenced inodes can be on the unlinked inode list. If we @@ -2091,75 +1584,11 @@ xfs_iunlink_lookup( } /* - * Update the prev pointer of the next agino. Returns -ENOLINK if the inode - * is not in cache. - */ -static int -xfs_iunlink_update_backref( - struct xfs_perag *pag, - xfs_agino_t prev_agino, - xfs_agino_t next_agino) -{ - struct xfs_inode *ip; - - /* No update necessary if we are at the end of the list. */ - if (next_agino == NULLAGINO) - return 0; - - ip = xfs_iunlink_lookup(pag, next_agino); - if (!ip) - return -ENOLINK; - - ip->i_prev_unlinked = prev_agino; - return 0; -} - -/* - * Point the AGI unlinked bucket at an inode and log the results. The caller - * is responsible for validating the old value. - */ -STATIC int -xfs_iunlink_update_bucket( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agibp, - unsigned int bucket_index, - xfs_agino_t new_agino) -{ - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t old_value; - int offset; - - ASSERT(xfs_verify_agino_or_null(pag, new_agino)); - - old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); - trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, - old_value, new_agino); - - /* - * We should never find the head of the list already set to the value - * passed in because either we're adding or removing ourselves from the - * head of the list. - */ - if (old_value == new_agino) { - xfs_buf_mark_corrupt(agibp); - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); - return -EFSCORRUPTED; - } - - agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); - offset = offsetof(struct xfs_agi, agi_unlinked) + - (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); - return 0; -} - -/* * Load the inode @next_agino into the cache and set its prev_unlinked pointer * to @prev_agino. Caller must hold the AGI to synchronize with other changes * to the unlinked list. */ -STATIC int +int xfs_iunlink_reload_next( struct xfs_trans *tp, struct xfs_buf *agibp, @@ -2215,187 +1644,6 @@ rele: return error; } -static int -xfs_iunlink_insert_inode( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agibp, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t next_agino; - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - int error; - - /* - * Get the index into the agi hash table for the list this inode will - * go on. Make sure the pointer isn't garbage and that this inode - * isn't already on the list. - */ - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (next_agino == agino || - !xfs_verify_agino_or_null(pag, next_agino)) { - xfs_buf_mark_corrupt(agibp); - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); - return -EFSCORRUPTED; - } - - /* - * Update the prev pointer in the next inode to point back to this - * inode. - */ - error = xfs_iunlink_update_backref(pag, agino, next_agino); - if (error == -ENOLINK) - error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); - if (error) - return error; - - if (next_agino != NULLAGINO) { - /* - * There is already another inode in the bucket, so point this - * inode to the current head of the list. - */ - error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); - if (error) - return error; - ip->i_next_unlinked = next_agino; - } - - /* Point the head of the list to point to this inode. */ - ip->i_prev_unlinked = NULLAGINO; - return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); -} - -/* - * This is called when the inode's link count has gone to 0 or we are creating - * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. - * - * We place the on-disk inode on a list in the AGI. It will be pulled from this - * list when the inode is freed. - */ -int -xfs_iunlink( - struct xfs_trans *tp, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; - struct xfs_buf *agibp; - int error; - - ASSERT(VFS_I(ip)->i_nlink == 0); - ASSERT(VFS_I(ip)->i_mode != 0); - trace_xfs_iunlink(ip); - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(pag, tp, 0, &agibp); - if (error) - goto out; - - error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); -out: - xfs_perag_put(pag); - return error; -} - -static int -xfs_iunlink_remove_inode( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_buf *agibp, - struct xfs_inode *ip) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = agibp->b_addr; - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - xfs_agino_t head_agino; - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; - int error; - - trace_xfs_iunlink_remove(ip); - - /* - * Get the index into the agi hash table for the list this inode will - * go on. Make sure the head pointer isn't garbage. - */ - head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (!xfs_verify_agino(pag, head_agino)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - agi, sizeof(*agi)); - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); - return -EFSCORRUPTED; - } - - /* - * Set our inode's next_unlinked pointer to NULL and then return - * the old pointer value so that we can update whatever was previous - * to us in the list to point to whatever was next in the list. - */ - error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); - if (error) - return error; - - /* - * Update the prev pointer in the next inode to point back to previous - * inode in the chain. - */ - error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, - ip->i_next_unlinked); - if (error == -ENOLINK) - error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, - ip->i_next_unlinked); - if (error) - return error; - - if (head_agino != agino) { - struct xfs_inode *prev_ip; - - prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); - if (!prev_ip) { - xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); - return -EFSCORRUPTED; - } - - error = xfs_iunlink_log_inode(tp, prev_ip, pag, - ip->i_next_unlinked); - prev_ip->i_next_unlinked = ip->i_next_unlinked; - } else { - /* Point the head of the list to the next unlinked inode. */ - error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, - ip->i_next_unlinked); - } - - ip->i_next_unlinked = NULLAGINO; - ip->i_prev_unlinked = 0; - return error; -} - -/* - * Pull the on-disk inode from the AGI unlinked list. - */ -int -xfs_iunlink_remove( - struct xfs_trans *tp, - struct xfs_perag *pag, - struct xfs_inode *ip) -{ - struct xfs_buf *agibp; - int error; - - trace_xfs_iunlink_remove(ip); - - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(pag, tp, 0, &agibp); - if (error) - return error; - - return xfs_iunlink_remove_inode(tp, pag, agibp, ip); -} - /* * Look up the inode number specified and if it is not already marked XFS_ISTALE * mark it stale. We should only find clean inodes in this lookup that aren't @@ -2614,36 +1862,10 @@ xfs_ifree( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - /* - * Free the inode first so that we guarantee that the AGI lock is going - * to be taken before we remove the inode from the unlinked list. This - * makes the AGI lock -> unlinked list modification order the same as - * used in O_TMPFILE creation. - */ - error = xfs_difree(tp, pag, ip->i_ino, &xic); - if (error) - goto out; - - error = xfs_iunlink_remove(tp, pag, ip); + error = xfs_inode_uninit(tp, pag, ip, &xic); if (error) goto out; - /* - * Free any local-format data sitting around before we reset the - * data fork to extents format. Note that the attr fork data has - * already been freed by xfs_attr_inactive. - */ - if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { - kfree(ip->i_df.if_data); - ip->i_df.if_data = NULL; - ip->i_df.if_bytes = 0; - } - - VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ - ip->i_diflags = 0; - ip->i_diflags2 = mp->m_ino_geo.new_diflags2; - ip->i_forkoff = 0; /* mark the attr fork not in use */ - ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS)) xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS); @@ -2652,13 +1874,6 @@ xfs_ifree( iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER); spin_unlock(&iip->ili_lock); - /* - * Bump the generation count so no one will be confused - * by reincarnations of this inode. - */ - VFS_I(ip)->i_generation++; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (xic.deleted) error = xfs_ifree_cluster(tp, pag, ip, &xic); out: @@ -2742,13 +1957,17 @@ xfs_remove( struct xfs_name *name, struct xfs_inode *ip) { + struct xfs_dir_update du = { + .dp = dp, + .name = name, + .ip = ip, + }; struct xfs_mount *mp = dp->i_mount; struct xfs_trans *tp = NULL; int is_dir = S_ISDIR(VFS_I(ip)->i_mode); int dontcare; int error = 0; uint resblks; - struct xfs_parent_args *ppargs; trace_xfs_remove(dp, name); @@ -2765,7 +1984,7 @@ xfs_remove( if (error) goto std_return; - error = xfs_parent_start(mp, &ppargs); + error = xfs_parent_start(mp, &du.ppargs); if (error) goto std_return; @@ -2788,76 +2007,10 @@ xfs_remove( goto out_parent; } - /* - * If we're removing a directory perform some additional validation. - */ - if (is_dir) { - ASSERT(VFS_I(ip)->i_nlink >= 2); - if (VFS_I(ip)->i_nlink != 2) { - error = -ENOTEMPTY; - goto out_trans_cancel; - } - if (!xfs_dir_isempty(ip)) { - error = -ENOTEMPTY; - goto out_trans_cancel; - } - - /* Drop the link from ip's "..". */ - error = xfs_droplink(tp, dp); - if (error) - goto out_trans_cancel; - - /* Drop the "." link from ip to self. */ - error = xfs_droplink(tp, ip); - if (error) - goto out_trans_cancel; - - /* - * Point the unlinked child directory's ".." entry to the root - * directory to eliminate back-references to inodes that may - * get freed before the child directory is closed. If the fs - * gets shrunk, this can lead to dirent inode validation errors. - */ - if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { - error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, - tp->t_mountp->m_sb.sb_rootino, 0); - if (error) - goto out_trans_cancel; - } - } else { - /* - * When removing a non-directory we need to log the parent - * inode here. For a directory this is done implicitly - * by the xfs_droplink call for the ".." entry. - */ - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - } - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - /* Drop the link from dp to ip. */ - error = xfs_droplink(tp, ip); + error = xfs_dir_remove_child(tp, resblks, &du); if (error) goto out_trans_cancel; - error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks); - if (error) { - ASSERT(error != -ENOENT); - goto out_trans_cancel; - } - - /* Remove parent pointer. */ - if (ppargs) { - error = xfs_parent_removename(tp, ppargs, dp, name, ip); - if (error) - goto out_trans_cancel; - } - - /* - * Drop the link from dp to ip, and if ip was a directory, remove the - * '.' and '..' references since we freed the directory. - */ - xfs_dir_update_hook(dp, ip, -1, name); - /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to @@ -2875,7 +2028,7 @@ xfs_remove( xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); return 0; out_trans_cancel: @@ -2884,7 +2037,7 @@ xfs_remove( xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); out_parent: - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); std_return: return error; } @@ -2964,160 +2117,6 @@ xfs_sort_inodes( } } -static int -xfs_finish_rename( - struct xfs_trans *tp) -{ - /* - * If this is a synchronous mount, make sure that the rename transaction - * goes to disk before returning to the user. - */ - if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) - xfs_trans_set_sync(tp); - - return xfs_trans_commit(tp); -} - -/* - * xfs_cross_rename() - * - * responsible for handling RENAME_EXCHANGE flag in renameat2() syscall - */ -STATIC int -xfs_cross_rename( - struct xfs_trans *tp, - struct xfs_inode *dp1, - struct xfs_name *name1, - struct xfs_inode *ip1, - struct xfs_parent_args *ip1_ppargs, - struct xfs_inode *dp2, - struct xfs_name *name2, - struct xfs_inode *ip2, - struct xfs_parent_args *ip2_ppargs, - int spaceres) -{ - int error = 0; - int ip1_flags = 0; - int ip2_flags = 0; - int dp2_flags = 0; - - /* Swap inode number for dirent in first parent */ - error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); - if (error) - goto out_trans_abort; - - /* Swap inode number for dirent in second parent */ - error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres); - if (error) - goto out_trans_abort; - - /* - * If we're renaming one or more directories across different parents, - * update the respective ".." entries (and link counts) to match the new - * parents. - */ - if (dp1 != dp2) { - dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; - - if (S_ISDIR(VFS_I(ip2)->i_mode)) { - error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, - dp1->i_ino, spaceres); - if (error) - goto out_trans_abort; - - /* transfer ip2 ".." reference to dp1 */ - if (!S_ISDIR(VFS_I(ip1)->i_mode)) { - error = xfs_droplink(tp, dp2); - if (error) - goto out_trans_abort; - xfs_bumplink(tp, dp1); - } - - /* - * Although ip1 isn't changed here, userspace needs - * to be warned about the change, so that applications - * relying on it (like backup ones), will properly - * notify the change - */ - ip1_flags |= XFS_ICHGTIME_CHG; - ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; - } - - if (S_ISDIR(VFS_I(ip1)->i_mode)) { - error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, - dp2->i_ino, spaceres); - if (error) - goto out_trans_abort; - - /* transfer ip1 ".." reference to dp2 */ - if (!S_ISDIR(VFS_I(ip2)->i_mode)) { - error = xfs_droplink(tp, dp1); - if (error) - goto out_trans_abort; - xfs_bumplink(tp, dp2); - } - - /* - * Although ip2 isn't changed here, userspace needs - * to be warned about the change, so that applications - * relying on it (like backup ones), will properly - * notify the change - */ - ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; - ip2_flags |= XFS_ICHGTIME_CHG; - } - } - - /* Schedule parent pointer replacements */ - if (ip1_ppargs) { - error = xfs_parent_replacename(tp, ip1_ppargs, dp1, name1, dp2, - name2, ip1); - if (error) - goto out_trans_abort; - } - - if (ip2_ppargs) { - error = xfs_parent_replacename(tp, ip2_ppargs, dp2, name2, dp1, - name1, ip2); - if (error) - goto out_trans_abort; - } - - if (ip1_flags) { - xfs_trans_ichgtime(tp, ip1, ip1_flags); - xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); - } - if (ip2_flags) { - xfs_trans_ichgtime(tp, ip2, ip2_flags); - xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); - } - if (dp2_flags) { - xfs_trans_ichgtime(tp, dp2, dp2_flags); - xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); - } - xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); - - /* - * Inform our hook clients that we've finished an exchange operation as - * follows: removed the source and target files from their directories; - * added the target to the source directory; and added the source to - * the target directory. All inodes are locked, so it's ok to model a - * rename this way so long as we say we deleted entries before we add - * new ones. - */ - xfs_dir_update_hook(dp1, ip1, -1, name1); - xfs_dir_update_hook(dp2, ip2, -1, name2); - xfs_dir_update_hook(dp1, ip2, 1, name1); - xfs_dir_update_hook(dp2, ip1, 1, name2); - - return xfs_finish_rename(tp); - -out_trans_abort: - xfs_trans_cancel(tp); - return error; -} - /* * xfs_rename_alloc_whiteout() * @@ -3133,12 +2132,17 @@ xfs_rename_alloc_whiteout( struct xfs_inode *dp, struct xfs_inode **wip) { + struct xfs_icreate_args args = { + .idmap = idmap, + .pip = dp, + .mode = S_IFCHR | WHITEOUT_MODE, + .flags = XFS_ICREATE_TMPFILE, + }; struct xfs_inode *tmpfile; struct qstr name; int error; - error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, - xfs_has_parent(dp->i_mount), &tmpfile); + error = xfs_create_tmpfile(&args, &tmpfile); if (error) return error; @@ -3178,13 +2182,20 @@ xfs_rename( struct xfs_inode *target_ip, unsigned int flags) { + struct xfs_dir_update du_src = { + .dp = src_dp, + .name = src_name, + .ip = src_ip, + }; + struct xfs_dir_update du_tgt = { + .dp = target_dp, + .name = target_name, + .ip = target_ip, + }; + struct xfs_dir_update du_wip = { }; struct xfs_mount *mp = src_dp->i_mount; struct xfs_trans *tp; - struct xfs_inode *wip = NULL; /* whiteout inode */ struct xfs_inode *inodes[__XFS_SORT_INODES]; - struct xfs_parent_args *src_ppargs = NULL; - struct xfs_parent_args *tgt_ppargs = NULL; - struct xfs_parent_args *wip_ppargs = NULL; int i; int num_inodes = __XFS_SORT_INODES; bool new_parent = (src_dp != target_dp); @@ -3204,8 +2215,8 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - error = xfs_rename_alloc_whiteout(idmap, src_name, - target_dp, &wip); + error = xfs_rename_alloc_whiteout(idmap, src_name, target_dp, + &du_wip.ip); if (error) return error; @@ -3213,21 +2224,21 @@ xfs_rename( src_name->type = XFS_DIR3_FT_CHRDEV; } - xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, - inodes, &num_inodes); + xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, du_wip.ip, + inodes, &num_inodes); - error = xfs_parent_start(mp, &src_ppargs); + error = xfs_parent_start(mp, &du_src.ppargs); if (error) goto out_release_wip; - if (wip) { - error = xfs_parent_start(mp, &wip_ppargs); + if (du_wip.ip) { + error = xfs_parent_start(mp, &du_wip.ppargs); if (error) goto out_src_ppargs; } if (target_ip) { - error = xfs_parent_start(mp, &tgt_ppargs); + error = xfs_parent_start(mp, &du_tgt.ppargs); if (error) goto out_wip_ppargs; } @@ -3235,7 +2246,7 @@ xfs_rename( retry: nospace_error = 0; spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL, - target_name->len, wip != NULL); + target_name->len, du_wip.ip != NULL); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); if (error == -ENOSPC) { nospace_error = error; @@ -3250,7 +2261,7 @@ retry: * We don't allow reservationless renaming when parent pointers are * enabled because we can't back out if the xattrs must grow. */ - if (src_ppargs && nospace_error) { + if (du_src.ppargs && nospace_error) { error = nospace_error; xfs_trans_cancel(tp); goto out_tgt_ppargs; @@ -3282,8 +2293,8 @@ retry: xfs_trans_ijoin(tp, src_ip, 0); if (target_ip) xfs_trans_ijoin(tp, target_ip, 0); - if (wip) - xfs_trans_ijoin(tp, wip, 0); + if (du_wip.ip) + xfs_trans_ijoin(tp, du_wip.ip, 0); /* * If we are using project inheritance, we only allow renames @@ -3298,11 +2309,11 @@ retry: /* RENAME_EXCHANGE is unique from here on. */ if (flags & RENAME_EXCHANGE) { - error = xfs_cross_rename(tp, src_dp, src_name, src_ip, - src_ppargs, target_dp, target_name, target_ip, - tgt_ppargs, spaceres); - nospace_error = 0; - goto out_unlock; + error = xfs_dir_exchange_children(tp, &du_src, &du_tgt, + spaceres); + if (error) + goto out_trans_cancel; + goto out_commit; } /* @@ -3335,39 +2346,12 @@ retry: * We don't allow quotaless renaming when parent pointers are enabled * because we can't back out if the xattrs must grow. */ - if (src_ppargs && nospace_error) { + if (du_src.ppargs && nospace_error) { error = nospace_error; goto out_trans_cancel; } /* - * Check for expected errors before we dirty the transaction - * so we can return an error without a transaction abort. - */ - if (target_ip == NULL) { - /* - * If there's no space reservation, check the entry will - * fit before actually inserting it. - */ - if (!spaceres) { - error = xfs_dir_canenter(tp, target_dp, target_name); - if (error) - goto out_trans_cancel; - } - } else { - /* - * If target exists and it's a directory, check that whether - * it can be destroyed. - */ - if (S_ISDIR(VFS_I(target_ip)->i_mode) && - (!xfs_dir_isempty(target_ip) || - (VFS_I(target_ip)->i_nlink > 2))) { - error = -EEXIST; - goto out_trans_cancel; - } - } - - /* * Lock the AGI buffers we need to handle bumping the nlink of the * whiteout inode off the unlinked list and to handle dropping the * nlink of the target inode. Per locking order rules, do this in @@ -3378,7 +2362,7 @@ retry: * target_ip is either null or an empty directory. */ for (i = 0; i < num_inodes && inodes[i] != NULL; i++) { - if (inodes[i] == wip || + if (inodes[i] == du_wip.ip || (inodes[i] == target_ip && (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { struct xfs_perag *pag; @@ -3393,188 +2377,29 @@ retry: } } - /* - * Directory entry creation below may acquire the AGF. Remove - * the whiteout from the unlinked list first to preserve correct - * AGI/AGF locking order. This dirties the transaction so failures - * after this point will abort and log recovery will clean up the - * mess. - * - * For whiteouts, we need to bump the link count on the whiteout - * inode. After this point, we have a real link, clear the tmpfile - * state flag from the inode so it doesn't accidentally get misused - * in future. - */ - if (wip) { - struct xfs_perag *pag; - - ASSERT(VFS_I(wip)->i_nlink == 0); - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino)); - error = xfs_iunlink_remove(tp, pag, wip); - xfs_perag_put(pag); - if (error) - goto out_trans_cancel; - - xfs_bumplink(tp, wip); - VFS_I(wip)->i_state &= ~I_LINKABLE; - } - - /* - * Set up the target. - */ - if (target_ip == NULL) { - /* - * If target does not exist and the rename crosses - * directories, adjust the target directory link count - * to account for the ".." reference from the new entry. - */ - error = xfs_dir_createname(tp, target_dp, target_name, - src_ip->i_ino, spaceres); - if (error) - goto out_trans_cancel; - - xfs_trans_ichgtime(tp, target_dp, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - if (new_parent && src_is_directory) { - xfs_bumplink(tp, target_dp); - } - } else { /* target_ip != NULL */ - /* - * Link the source inode under the target name. - * If the source inode is a directory and we are moving - * it across directories, its ".." entry will be - * inconsistent until we replace that down below. - * - * In case there is already an entry with the same - * name at the destination directory, remove it first. - */ - error = xfs_dir_replace(tp, target_dp, target_name, - src_ip->i_ino, spaceres); - if (error) - goto out_trans_cancel; - - xfs_trans_ichgtime(tp, target_dp, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - /* - * Decrement the link count on the target since the target - * dir no longer points to it. - */ - error = xfs_droplink(tp, target_ip); - if (error) - goto out_trans_cancel; - - if (src_is_directory) { - /* - * Drop the link from the old "." entry. - */ - error = xfs_droplink(tp, target_ip); - if (error) - goto out_trans_cancel; - } - } /* target_ip != NULL */ - - /* - * Remove the source. - */ - if (new_parent && src_is_directory) { - /* - * Rewrite the ".." entry to point to the new - * directory. - */ - error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, - target_dp->i_ino, spaceres); - ASSERT(error != -EEXIST); - if (error) - goto out_trans_cancel; - } - - /* - * We always want to hit the ctime on the source inode. - * - * This isn't strictly required by the standards since the source - * inode isn't really being changed, but old unix file systems did - * it and some incremental backup programs won't work without it. - */ - xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); - - /* - * Adjust the link count on src_dp. This is necessary when - * renaming a directory, either within one parent when - * the target existed, or across two parent directories. - */ - if (src_is_directory && (new_parent || target_ip != NULL)) { - - /* - * Decrement link count on src_directory since the - * entry that's moved no longer points to it. - */ - error = xfs_droplink(tp, src_dp); - if (error) - goto out_trans_cancel; - } - - /* - * For whiteouts, we only need to update the source dirent with the - * inode number of the whiteout inode rather than removing it - * altogether. - */ - if (wip) - error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, - spaceres); - else - error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, - spaceres); - + error = xfs_dir_rename_children(tp, &du_src, &du_tgt, spaceres, + &du_wip); if (error) goto out_trans_cancel; - /* Schedule parent pointer updates. */ - if (wip_ppargs) { - error = xfs_parent_addname(tp, wip_ppargs, src_dp, src_name, - wip); - if (error) - goto out_trans_cancel; - } - - if (src_ppargs) { - error = xfs_parent_replacename(tp, src_ppargs, src_dp, - src_name, target_dp, target_name, src_ip); - if (error) - goto out_trans_cancel; - } - - if (tgt_ppargs) { - error = xfs_parent_removename(tp, tgt_ppargs, target_dp, - target_name, target_ip); - if (error) - goto out_trans_cancel; + if (du_wip.ip) { + /* + * Now we have a real link, clear the "I'm a tmpfile" state + * flag from the inode so it doesn't accidentally get misused in + * future. + */ + VFS_I(du_wip.ip)->i_state &= ~I_LINKABLE; } - xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); - if (new_parent) - xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - +out_commit: /* - * Inform our hook clients that we've finished a rename operation as - * follows: removed the source and target files from their directories; - * that we've added the source to the target directory; and finally - * that we've added the whiteout, if there was one. All inodes are - * locked, so it's ok to model a rename this way so long as we say we - * deleted entries before we add new ones. + * If this is a synchronous mount, make sure that the rename + * transaction goes to disk before returning to the user. */ - if (target_ip) - xfs_dir_update_hook(target_dp, target_ip, -1, target_name); - xfs_dir_update_hook(src_dp, src_ip, -1, src_name); - xfs_dir_update_hook(target_dp, src_ip, 1, target_name); - if (wip) - xfs_dir_update_hook(src_dp, wip, 1, src_name); + if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) + xfs_trans_set_sync(tp); - error = xfs_finish_rename(tp); + error = xfs_trans_commit(tp); nospace_error = 0; goto out_unlock; @@ -3583,14 +2408,14 @@ out_trans_cancel: out_unlock: xfs_iunlock_rename(inodes, num_inodes); out_tgt_ppargs: - xfs_parent_finish(mp, tgt_ppargs); + xfs_parent_finish(mp, du_tgt.ppargs); out_wip_ppargs: - xfs_parent_finish(mp, wip_ppargs); + xfs_parent_finish(mp, du_wip.ppargs); out_src_ppargs: - xfs_parent_finish(mp, src_ppargs); + xfs_parent_finish(mp, du_src.ppargs); out_release_wip: - if (wip) - xfs_irele(wip); + if (du_wip.ip) + xfs_irele(du_wip.ip); if (error == -ENOSPC && nospace_error) error = nospace_error; return error; @@ -4293,3 +3118,11 @@ xfs_inode_alloc_unitsize( return XFS_FSB_TO_B(ip->i_mount, blocks); } + +/* Should we always be using copy on write for file writes? */ +bool +xfs_is_always_cow_inode( + struct xfs_inode *ip) +{ + return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 292b90b5f2ac..51defdebef30 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -8,6 +8,7 @@ #include "xfs_inode_buf.h" #include "xfs_inode_fork.h" +#include "xfs_inode_util.h" /* * Kernel only inode definitions @@ -270,15 +271,6 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags) return ret; } -static inline prid_t -xfs_get_initial_prid(struct xfs_inode *dp) -{ - if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT) - return dp->i_projid; - - return XFS_PROJID_DEFAULT; -} - static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; @@ -292,6 +284,13 @@ static inline bool xfs_is_metadata_inode(struct xfs_inode *ip) xfs_is_quota_inode(&mp->m_sb, ip->i_ino); } +bool xfs_is_always_cow_inode(struct xfs_inode *ip); + +static inline bool xfs_is_cow_inode(struct xfs_inode *ip) +{ + return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); +} + /* * Check if an inode has any data in the COW fork. This might be often false * even for inodes with the reflink flag when there is no pending COW operation. @@ -517,12 +516,9 @@ int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); -int xfs_create(struct mnt_idmap *idmap, - struct xfs_inode *dp, struct xfs_name *name, - umode_t mode, dev_t rdev, bool need_xattr, - struct xfs_inode **ipp); -int xfs_create_tmpfile(struct mnt_idmap *idmap, - struct xfs_inode *dp, umode_t mode, bool init_xattrs, +int xfs_create(const struct xfs_icreate_args *iargs, + struct xfs_name *name, struct xfs_inode **ipp); +int xfs_create_tmpfile(const struct xfs_icreate_args *iargs, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); @@ -542,7 +538,6 @@ void xfs_assert_ilocked(struct xfs_inode *, uint); uint xfs_ilock_data_map_shared(struct xfs_inode *); uint xfs_ilock_attr_map_shared(struct xfs_inode *); -uint xfs_ip2xflags(struct xfs_inode *); int xfs_ifree(struct xfs_trans *, struct xfs_inode *); int xfs_itruncate_extents_flags(struct xfs_trans **, struct xfs_inode *, int, xfs_fsize_t, int); @@ -556,13 +551,8 @@ int xfs_iflush_cluster(struct xfs_buf *); void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, struct xfs_inode *ip1, uint ip1_mode); -xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); -xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); - -int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp, - struct xfs_inode *pip, xfs_ino_t ino, umode_t mode, - xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs, - struct xfs_inode **ipp); +int xfs_icreate(struct xfs_trans *tp, xfs_ino_t ino, + const struct xfs_icreate_args *args, struct xfs_inode **ipp); static inline int xfs_itruncate_extents( @@ -616,18 +606,15 @@ extern struct kmem_cache *xfs_inode_cache; bool xfs_inode_needs_inactive(struct xfs_inode *ip); -int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip); -int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, - struct xfs_inode *ip); struct xfs_inode *xfs_iunlink_lookup(struct xfs_perag *pag, xfs_agino_t agino); +int xfs_iunlink_reload_next(struct xfs_trans *tp, struct xfs_buf *agibp, + xfs_agino_t prev_agino, xfs_agino_t next_agino); void xfs_end_io(struct work_struct *work); int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); -int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip); -void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip); void xfs_lock_inodes(struct xfs_inode **ips, int inodes, uint lock_mode); void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes); @@ -645,29 +632,8 @@ void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, xfs_filblks_t *dblocks, xfs_filblks_t *rblocks); unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip); -struct xfs_dir_update_params { - const struct xfs_inode *dp; - const struct xfs_inode *ip; - const struct xfs_name *name; - int delta; -}; - -#ifdef CONFIG_XFS_LIVE_HOOKS -void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip, - int delta, const struct xfs_name *name); - -struct xfs_dir_hook { - struct xfs_hook dirent_hook; -}; - -void xfs_dir_hook_disable(void); -void xfs_dir_hook_enable(void); - -int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook); -void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook); -void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn); -#else -# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0) -#endif /* CONFIG_XFS_LIVE_HOOKS */ +int xfs_icreate_dqalloc(const struct xfs_icreate_args *args, + struct xfs_dquot **udqpp, struct xfs_dquot **gdqpp, + struct xfs_dquot **pdqpp); #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index f28d653300d1..ef05cbbe116c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -37,6 +37,36 @@ xfs_inode_item_sort( return INODE_ITEM(lip)->ili_inode->i_ino; } +#ifdef DEBUG_EXPENSIVE +static void +xfs_inode_item_precommit_check( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dinode *dip; + xfs_failaddr_t fa; + + dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS); + if (!dip) { + ASSERT(dip != NULL); + return; + } + + xfs_inode_to_disk(ip, dip, 0); + xfs_dinode_calc_crc(mp, dip); + fa = xfs_dinode_verify(mp, ip->i_ino, dip); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, + sizeof(*dip), fa); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + ASSERT(fa == NULL); + } + kfree(dip); +} +#else +# define xfs_inode_item_precommit_check(ip) ((void)0) +#endif + /* * Prior to finally logging the inode, we have to ensure that all the * per-modification inode state changes are applied. This includes VFS inode @@ -169,6 +199,8 @@ xfs_inode_item_precommit( iip->ili_fields |= (flags | iip->ili_last_fields); spin_unlock(&iip->ili_lock); + xfs_inode_item_precommit_check(ip); + /* * We are done with the log item transaction dirty state, so clear it so * that it doesn't pollute future transactions. diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index f0117188f302..4e933db75b12 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -469,66 +469,6 @@ xfs_fileattr_get( return 0; } -STATIC uint16_t -xfs_flags2diflags( - struct xfs_inode *ip, - unsigned int xflags) -{ - /* can't set PREALLOC this way, just preserve it */ - uint16_t di_flags = - (ip->i_diflags & XFS_DIFLAG_PREALLOC); - - if (xflags & FS_XFLAG_IMMUTABLE) - di_flags |= XFS_DIFLAG_IMMUTABLE; - if (xflags & FS_XFLAG_APPEND) - di_flags |= XFS_DIFLAG_APPEND; - if (xflags & FS_XFLAG_SYNC) - di_flags |= XFS_DIFLAG_SYNC; - if (xflags & FS_XFLAG_NOATIME) - di_flags |= XFS_DIFLAG_NOATIME; - if (xflags & FS_XFLAG_NODUMP) - di_flags |= XFS_DIFLAG_NODUMP; - if (xflags & FS_XFLAG_NODEFRAG) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (xflags & FS_XFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - if (S_ISDIR(VFS_I(ip)->i_mode)) { - if (xflags & FS_XFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (xflags & FS_XFLAG_NOSYMLINKS) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if (xflags & FS_XFLAG_EXTSZINHERIT) - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - if (xflags & FS_XFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - } else if (S_ISREG(VFS_I(ip)->i_mode)) { - if (xflags & FS_XFLAG_REALTIME) - di_flags |= XFS_DIFLAG_REALTIME; - if (xflags & FS_XFLAG_EXTSIZE) - di_flags |= XFS_DIFLAG_EXTSIZE; - } - - return di_flags; -} - -STATIC uint64_t -xfs_flags2diflags2( - struct xfs_inode *ip, - unsigned int xflags) -{ - uint64_t di_flags2 = - (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | - XFS_DIFLAG2_BIGTIME | - XFS_DIFLAG2_NREXT64)); - - if (xflags & FS_XFLAG_DAX) - di_flags2 |= XFS_DIFLAG2_DAX; - if (xflags & FS_XFLAG_COWEXTSIZE) - di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; - - return di_flags2; -} - static int xfs_ioctl_setattr_xflags( struct xfs_trans *tp, diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ff222827e550..07f736c42460 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -26,6 +26,7 @@ #include "xfs_ioctl.h" #include "xfs_xattr.h" #include "xfs_file.h" +#include "xfs_bmap.h" #include <linux/posix_acl.h> #include <linux/security.h> @@ -157,8 +158,6 @@ xfs_create_need_xattr( if (dir->i_sb->s_security) return true; #endif - if (xfs_has_parent(XFS_I(dir)->i_mount)) - return true; return false; } @@ -172,49 +171,55 @@ xfs_generic_create( dev_t rdev, struct file *tmpfile) /* unnamed file */ { - struct inode *inode; - struct xfs_inode *ip = NULL; - struct posix_acl *default_acl, *acl; - struct xfs_name name; - int error; + struct xfs_icreate_args args = { + .idmap = idmap, + .pip = XFS_I(dir), + .rdev = rdev, + .mode = mode, + }; + struct inode *inode; + struct xfs_inode *ip = NULL; + struct posix_acl *default_acl, *acl; + struct xfs_name name; + int error; /* * Irix uses Missed'em'V split, but doesn't want to see * the upper 5 bits of (14bit) major. */ - if (S_ISCHR(mode) || S_ISBLK(mode)) { - if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) + if (S_ISCHR(args.mode) || S_ISBLK(args.mode)) { + if (unlikely(!sysv_valid_dev(args.rdev) || + MAJOR(args.rdev) & ~0x1ff)) return -EINVAL; } else { - rdev = 0; + args.rdev = 0; } - error = posix_acl_create(dir, &mode, &default_acl, &acl); + error = posix_acl_create(dir, &args.mode, &default_acl, &acl); if (error) return error; /* Verify mode is valid also for tmpfile case */ - error = xfs_dentry_mode_to_name(&name, dentry, mode); + error = xfs_dentry_mode_to_name(&name, dentry, args.mode); if (unlikely(error)) goto out_free_acl; if (!tmpfile) { - error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev, - xfs_create_need_xattr(dir, default_acl, acl), - &ip); + if (xfs_create_need_xattr(dir, default_acl, acl)) + args.flags |= XFS_ICREATE_INIT_XATTRS; + + error = xfs_create(&args, &name, &ip); } else { - bool init_xattrs = false; + args.flags |= XFS_ICREATE_TMPFILE; /* - * If this temporary file will be linkable, set up the file - * with an attr fork to receive a parent pointer. + * If this temporary file will not be linkable, don't bother + * creating an attr fork to receive a parent pointer. */ - if (!(tmpfile->f_flags & O_EXCL) && - xfs_has_parent(XFS_I(dir)->i_mount)) - init_xattrs = true; + if (tmpfile->f_flags & O_EXCL) + args.flags |= XFS_ICREATE_UNLINKABLE; - error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, - init_xattrs, &ip); + error = xfs_create_tmpfile(&args, &ip); } if (unlikely(error)) goto out_free_acl; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index ac355328121a..54a098fe7285 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -135,8 +135,6 @@ typedef __u32 xfs_nlink_t; */ #define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) -#define XFS_PROJID_DEFAULT 0 - #define howmany(x, y) (((x)+((y)-1))/(y)) static inline void delay(long ticks) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 47120b745c47..9490b913a4ab 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -793,12 +793,15 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { + struct xfs_icreate_args args = { + .mode = S_IFREG, + .flags = XFS_ICREATE_UNLINKABLE, + }; xfs_ino_t ino; error = xfs_dialloc(&tp, 0, S_IFREG, &ino); if (!error) - error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino, - S_IFREG, 1, 0, 0, false, ipp); + error = xfs_icreate(tp, ino, &args, ipp); if (error) { xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 65c5dfe17ecf..fb55e4ce49fa 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -6,16 +6,6 @@ #ifndef __XFS_REFLINK_H #define __XFS_REFLINK_H 1 -static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip) -{ - return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); -} - -static inline bool xfs_is_cow_inode(struct xfs_inode *ip) -{ - return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); -} - extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared); int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 17aee806ec2e..77f19e2f66e0 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -90,19 +90,25 @@ xfs_symlink( struct xfs_inode **ipp) { struct xfs_mount *mp = dp->i_mount; + struct xfs_icreate_args args = { + .idmap = idmap, + .pip = dp, + .mode = S_IFLNK | (mode & ~S_IFMT), + }; + struct xfs_dir_update du = { + .dp = dp, + .name = link_name, + }; struct xfs_trans *tp = NULL; - struct xfs_inode *ip = NULL; int error = 0; int pathlen; bool unlock_dp_on_error = false; xfs_filblks_t fs_blocks; - prid_t prid; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; + struct xfs_dquot *udqp; + struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; uint resblks; xfs_ino_t ino; - struct xfs_parent_args *ppargs; *ipp = NULL; @@ -119,15 +125,8 @@ xfs_symlink( return -ENAMETOOLONG; ASSERT(pathlen > 0); - prid = xfs_get_initial_prid(dp); - - /* - * Make sure that we have allocated dquot(s) on disk. - */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), - mapped_fsgid(idmap, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + /* Make sure that we have allocated dquot(s) on disk. */ + error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); if (error) return error; @@ -143,7 +142,7 @@ xfs_symlink( fs_blocks = xfs_symlink_blocks(mp, pathlen); resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks); - error = xfs_parent_start(mp, &ppargs); + error = xfs_parent_start(mp, &du.ppargs); if (error) goto out_release_dquots; @@ -168,9 +167,7 @@ xfs_symlink( */ error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); if (!error) - error = xfs_init_new_inode(idmap, tp, dp, ino, - S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, - xfs_has_parent(mp), &ip); + error = xfs_icreate(tp, ino, &args, &du.ip); if (error) goto out_trans_cancel; @@ -186,33 +183,22 @@ xfs_symlink( /* * Also attach the dquot(s) to it, if applicable. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); + xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp); resblks -= XFS_IALLOC_SPACE_RES(mp); - error = xfs_symlink_write_target(tp, ip, ip->i_ino, target_path, + error = xfs_symlink_write_target(tp, du.ip, du.ip->i_ino, target_path, pathlen, fs_blocks, resblks); if (error) goto out_trans_cancel; resblks -= fs_blocks; - i_size_write(VFS_I(ip), ip->i_disk_size); + i_size_write(VFS_I(du.ip), du.ip->i_disk_size); /* * Create the directory entry for the symlink. */ - error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks); + error = xfs_dir_create_child(tp, resblks, &du); if (error) goto out_trans_cancel; - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - - /* Add parent pointer for the new symlink. */ - if (ppargs) { - error = xfs_parent_addname(tp, ppargs, dp, link_name, ip); - if (error) - goto out_trans_cancel; - } - - xfs_dir_update_hook(dp, ip, 1, link_name); /* * If this is a synchronous mount, make sure that the @@ -230,10 +216,10 @@ xfs_symlink( xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); - *ipp = ip; - xfs_iunlock(ip, XFS_ILOCK_EXCL); + *ipp = du.ip; + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); return 0; out_trans_cancel: @@ -244,13 +230,13 @@ out_release_inode: * setup of the inode and release the inode. This prevents recursive * transactions and deadlocks from xfs_inactive. */ - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_finish_inode_setup(ip); - xfs_irele(ip); + if (du.ip) { + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); + xfs_finish_inode_setup(du.ip); + xfs_irele(du.ip); } out_parent: - xfs_parent_finish(mp, ppargs); + xfs_parent_finish(mp, du.ppargs); out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 1636663707dc..f97e8c68641f 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -224,7 +224,6 @@ void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); bool xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); -void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint, uint); |