aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Kconfig12
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c43
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c661
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h49
-rw-r--r--fs/xfs/libxfs/xfs_format.h9
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c15
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c749
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.h62
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h1
-rw-r--r--fs/xfs/libxfs/xfs_shared.h7
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/scrub/common.c1
-rw-r--r--fs/xfs/scrub/tempfile.c21
-rw-r--r--fs/xfs/xfs.h4
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_dquot_item.c31
-rw-r--r--fs/xfs/xfs_inode.c1487
-rw-r--r--fs/xfs/xfs_inode.h70
-rw-r--r--fs/xfs/xfs_inode_item.c32
-rw-r--r--fs/xfs/xfs_ioctl.c60
-rw-r--r--fs/xfs/xfs_iops.c51
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_reflink.h10
-rw-r--r--fs/xfs/xfs_symlink.c70
-rw-r--r--fs/xfs/xfs_trans.h1
28 files changed, 1948 insertions, 1545 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index d41edd30388b..fffd6fffdce0 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -217,6 +217,18 @@ config XFS_DEBUG
Say N unless you are an XFS developer, or you play one on TV.
+config XFS_DEBUG_EXPENSIVE
+ bool "XFS expensive debugging checks"
+ depends on XFS_FS && XFS_DEBUG
+ help
+ Say Y here to get an XFS build with expensive debugging checks
+ enabled. These checks may affect performance significantly.
+
+ Note that the resulting code will be HUGER and SLOWER, and probably
+ not useful unless you are debugging a particular problem.
+
+ Say N unless you are an XFS developer, or you play one on TV.
+
config XFS_ASSERT_FATAL
bool "XFS fatal asserts"
default y
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c50447548d65..dd692619bed5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,6 +40,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_iext_tree.o \
xfs_inode_fork.o \
xfs_inode_buf.o \
+ xfs_inode_util.o \
xfs_log_rlimit.o \
xfs_ag_resv.o \
xfs_parent.o \
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6af6f744fdd6..09e3302a4b72 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -39,6 +39,7 @@
#include "xfs_health.h"
#include "xfs_bmap_item.h"
#include "xfs_symlink_remote.h"
+#include "xfs_inode_util.h"
struct kmem_cache *xfs_bmap_intent_cache;
@@ -6454,3 +6455,45 @@ xfs_bmap_query_all(
return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
}
+
+/* Helper function to extract extent size hint from inode */
+xfs_extlen_t
+xfs_get_extsz_hint(
+ struct xfs_inode *ip)
+{
+ /*
+ * No point in aligning allocations if we need to COW to actually
+ * write to them.
+ */
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
+ if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
+ return ip->i_extsize;
+ if (XFS_IS_REALTIME_INODE(ip) &&
+ ip->i_mount->m_sb.sb_rextsize > 1)
+ return ip->i_mount->m_sb.sb_rextsize;
+ return 0;
+}
+
+/*
+ * Helper function to extract CoW extent size hint from inode.
+ * Between the extent size hint and the CoW extent size hint, we
+ * return the greater of the two. If the value is zero (automatic),
+ * use the default size.
+ */
+xfs_extlen_t
+xfs_get_cowextsz_hint(
+ struct xfs_inode *ip)
+{
+ xfs_extlen_t a, b;
+
+ a = 0;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ a = ip->i_cowextsize;
+ b = xfs_get_extsz_hint(ip);
+
+ a = max(a, b);
+ if (a == 0)
+ return XFS_DEFAULT_COWEXTSZ_HINT;
+ return a;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 667b0c2b33d1..7592d46e97c6 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -296,4 +296,7 @@ typedef int (*xfs_bmap_query_range_fn)(
int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn,
void *priv);
+xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
+xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
+
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 457f9a38f850..202468223bf9 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -19,6 +19,11 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_health.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_parent.h"
+#include "xfs_ag.h"
+#include "xfs_ialloc.h"
const struct xfs_name xfs_name_dotdot = {
.name = (const unsigned char *)"..",
@@ -584,9 +589,9 @@ xfs_dir_replace(
*/
int
xfs_dir_canenter(
- xfs_trans_t *tp,
- xfs_inode_t *dp,
- struct xfs_name *name) /* name of entry to add */
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ const struct xfs_name *name) /* name of entry to add */
{
return xfs_dir_createname(tp, dp, name, 0, 0);
}
@@ -756,3 +761,653 @@ xfs_dir2_compname(
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/*
+ * Use a static key here to reduce the overhead of directory live update hooks.
+ * If the compiler supports jump labels, the static branch will be replaced by
+ * a nop sled when there are no hook users. Online fsck is currently the only
+ * caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch);
+
+void
+xfs_dir_hook_disable(void)
+{
+ xfs_hooks_switch_off(&xfs_dir_hooks_switch);
+}
+
+void
+xfs_dir_hook_enable(void)
+{
+ xfs_hooks_switch_on(&xfs_dir_hooks_switch);
+}
+
+/* Call hooks for a directory update relating to a child dirent update. */
+inline void
+xfs_dir_update_hook(
+ struct xfs_inode *dp,
+ struct xfs_inode *ip,
+ int delta,
+ const struct xfs_name *name)
+{
+ if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) {
+ struct xfs_dir_update_params p = {
+ .dp = dp,
+ .ip = ip,
+ .delta = delta,
+ .name = name,
+ };
+ struct xfs_mount *mp = ip->i_mount;
+
+ xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p);
+ }
+}
+
+/* Call the specified function during a directory update. */
+int
+xfs_dir_hook_add(
+ struct xfs_mount *mp,
+ struct xfs_dir_hook *hook)
+{
+ return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook);
+}
+
+/* Stop calling the specified function during a directory update. */
+void
+xfs_dir_hook_del(
+ struct xfs_mount *mp,
+ struct xfs_dir_hook *hook)
+{
+ xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook);
+}
+
+/* Configure directory update hook functions. */
+void
+xfs_dir_hook_setup(
+ struct xfs_dir_hook *hook,
+ notifier_fn_t mod_fn)
+{
+ xfs_hook_setup(&hook->dirent_hook, mod_fn);
+}
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
+/*
+ * Given a directory @dp, a newly allocated inode @ip, and a @name, link @ip
+ * into @dp under the given @name. If @ip is a directory, it will be
+ * initialized. Both inodes must have the ILOCK held and the transaction must
+ * have sufficient blocks reserved.
+ */
+int
+xfs_dir_create_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+
+ error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks);
+ if (error) {
+ ASSERT(error != -ENOSPC);
+ return error;
+ }
+
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ error = xfs_dir_init(tp, ip, dp);
+ if (error)
+ return error;
+
+ xfs_bumplink(tp, dp);
+ }
+
+ /*
+ * If we have parent pointers, we need to add the attribute containing
+ * the parent information now.
+ */
+ if (du->ppargs) {
+ error = xfs_parent_addname(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, 1, name);
+ return 0;
+}
+
+/*
+ * Given a directory @dp, an existing non-directory inode @ip, and a @name,
+ * link @ip into @dp under the given @name. Both inodes must have the ILOCK
+ * held.
+ */
+int
+xfs_dir_add_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ struct xfs_mount *mp = tp->t_mountp;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+ ASSERT(!S_ISDIR(VFS_I(ip)->i_mode));
+
+ if (!resblks) {
+ error = xfs_dir_canenter(tp, dp, name);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Handle initial link state of O_TMPFILE inode
+ */
+ if (VFS_I(ip)->i_nlink == 0) {
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, ip);
+ xfs_perag_put(pag);
+ if (error)
+ return error;
+ }
+
+ error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+ xfs_bumplink(tp, ip);
+
+ /*
+ * If we have parent pointers, we now need to add the parent record to
+ * the attribute fork of the inode. If this is the initial parent
+ * attribute, we need to create it correctly, otherwise we can just add
+ * the parent to the inode.
+ */
+ if (du->ppargs) {
+ error = xfs_parent_addname(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, 1, name);
+ return 0;
+}
+
+/*
+ * Given a directory @dp, a child @ip, and a @name, remove the (@name, @ip)
+ * entry from the directory. Both inodes must have the ILOCK held.
+ */
+int
+xfs_dir_remove_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+
+ /*
+ * If we're removing a directory perform some additional validation.
+ */
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ ASSERT(VFS_I(ip)->i_nlink >= 2);
+ if (VFS_I(ip)->i_nlink != 2)
+ return -ENOTEMPTY;
+ if (!xfs_dir_isempty(ip))
+ return -ENOTEMPTY;
+
+ /* Drop the link from ip's "..". */
+ error = xfs_droplink(tp, dp);
+ if (error)
+ return error;
+
+ /* Drop the "." link from ip to self. */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ return error;
+
+ /*
+ * Point the unlinked child directory's ".." entry to the root
+ * directory to eliminate back-references to inodes that may
+ * get freed before the child directory is closed. If the fs
+ * gets shrunk, this can lead to dirent inode validation errors.
+ */
+ if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+ return error;
+ }
+ } else {
+ /*
+ * When removing a non-directory we need to log the parent
+ * inode here. For a directory this is done implicitly
+ * by the xfs_droplink call for the ".." entry.
+ */
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+ }
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /* Drop the link from dp to ip. */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ return error;
+
+ error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
+ if (error) {
+ ASSERT(error != -ENOENT);
+ return error;
+ }
+
+ /* Remove parent pointer. */
+ if (du->ppargs) {
+ error = xfs_parent_removename(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, -1, name);
+ return 0;
+}
+
+/*
+ * Exchange the entry (@name1, @ip1) in directory @dp1 with the entry (@name2,
+ * @ip2) in directory @dp2, and update '..' @ip1 and @ip2's entries as needed.
+ * @ip1 and @ip2 need not be of the same type.
+ *
+ * All inodes must have the ILOCK held, and both entries must already exist.
+ */
+int
+xfs_dir_exchange_children(
+ struct xfs_trans *tp,
+ struct xfs_dir_update *du1,
+ struct xfs_dir_update *du2,
+ unsigned int spaceres)
+{
+ struct xfs_inode *dp1 = du1->dp;
+ const struct xfs_name *name1 = du1->name;
+ struct xfs_inode *ip1 = du1->ip;
+ struct xfs_inode *dp2 = du2->dp;
+ const struct xfs_name *name2 = du2->name;
+ struct xfs_inode *ip2 = du2->ip;
+ int ip1_flags = 0;
+ int ip2_flags = 0;
+ int dp2_flags = 0;
+ int error;
+
+ /* Swap inode number for dirent in first parent */
+ error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* Swap inode number for dirent in second parent */
+ error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /*
+ * If we're renaming one or more directories across different parents,
+ * update the respective ".." entries (and link counts) to match the new
+ * parents.
+ */
+ if (dp1 != dp2) {
+ dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+
+ if (S_ISDIR(VFS_I(ip2)->i_mode)) {
+ error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
+ dp1->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* transfer ip2 ".." reference to dp1 */
+ if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
+ error = xfs_droplink(tp, dp2);
+ if (error)
+ return error;
+ xfs_bumplink(tp, dp1);
+ }
+
+ /*
+ * Although ip1 isn't changed here, userspace needs
+ * to be warned about the change, so that applications
+ * relying on it (like backup ones), will properly
+ * notify the change
+ */
+ ip1_flags |= XFS_ICHGTIME_CHG;
+ ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ }
+
+ if (S_ISDIR(VFS_I(ip1)->i_mode)) {
+ error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
+ dp2->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* transfer ip1 ".." reference to dp2 */
+ if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
+ error = xfs_droplink(tp, dp1);
+ if (error)
+ return error;
+ xfs_bumplink(tp, dp2);
+ }
+
+ /*
+ * Although ip2 isn't changed here, userspace needs
+ * to be warned about the change, so that applications
+ * relying on it (like backup ones), will properly
+ * notify the change
+ */
+ ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ ip2_flags |= XFS_ICHGTIME_CHG;
+ }
+ }
+
+ if (ip1_flags) {
+ xfs_trans_ichgtime(tp, ip1, ip1_flags);
+ xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
+ }
+ if (ip2_flags) {
+ xfs_trans_ichgtime(tp, ip2, ip2_flags);
+ xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
+ }
+ if (dp2_flags) {
+ xfs_trans_ichgtime(tp, dp2, dp2_flags);
+ xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
+ }
+ xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
+
+ /* Schedule parent pointer replacements */
+ if (du1->ppargs) {
+ error = xfs_parent_replacename(tp, du1->ppargs, dp1, name1,
+ dp2, name2, ip1);
+ if (error)
+ return error;
+ }
+
+ if (du2->ppargs) {
+ error = xfs_parent_replacename(tp, du2->ppargs, dp2, name2,
+ dp1, name1, ip2);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Inform our hook clients that we've finished an exchange operation as
+ * follows: removed the source and target files from their directories;
+ * added the target to the source directory; and added the source to
+ * the target directory. All inodes are locked, so it's ok to model a
+ * rename this way so long as we say we deleted entries before we add
+ * new ones.
+ */
+ xfs_dir_update_hook(dp1, ip1, -1, name1);
+ xfs_dir_update_hook(dp2, ip2, -1, name2);
+ xfs_dir_update_hook(dp1, ip2, 1, name1);
+ xfs_dir_update_hook(dp2, ip1, 1, name2);
+ return 0;
+}
+
+/*
+ * Given an entry (@src_name, @src_ip) in directory @src_dp, make the entry
+ * @target_name in directory @target_dp point to @src_ip and remove the
+ * original entry, cleaning up everything left behind.
+ *
+ * Cleanup involves dropping a link count on @target_ip, and either removing
+ * the (@src_name, @src_ip) entry from @src_dp or simply replacing the entry
+ * with (@src_name, @wip) if a whiteout inode @wip is supplied.
+ *
+ * All inodes must have the ILOCK held. We assume that if @src_ip is a
+ * directory then its '..' doesn't already point to @target_dp, and that @wip
+ * is a freshly allocated whiteout.
+ */
+int
+xfs_dir_rename_children(
+ struct xfs_trans *tp,
+ struct xfs_dir_update *du_src,
+ struct xfs_dir_update *du_tgt,
+ unsigned int spaceres,
+ struct xfs_dir_update *du_wip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *src_dp = du_src->dp;
+ const struct xfs_name *src_name = du_src->name;
+ struct xfs_inode *src_ip = du_src->ip;
+ struct xfs_inode *target_dp = du_tgt->dp;
+ const struct xfs_name *target_name = du_tgt->name;
+ struct xfs_inode *target_ip = du_tgt->ip;
+ bool new_parent = (src_dp != target_dp);
+ bool src_is_directory;
+ int error;
+
+ src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
+
+ /*
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
+ */
+ if (target_ip == NULL) {
+ /*
+ * If there's no space reservation, check the entry will
+ * fit before actually inserting it.
+ */
+ if (!spaceres) {
+ error = xfs_dir_canenter(tp, target_dp, target_name);
+ if (error)
+ return error;
+ }
+ } else {
+ /*
+ * If target exists and it's a directory, check that whether
+ * it can be destroyed.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
+ (!xfs_dir_isempty(target_ip) ||
+ (VFS_I(target_ip)->i_nlink > 2)))
+ return -EEXIST;
+ }
+
+ /*
+ * Directory entry creation below may acquire the AGF. Remove
+ * the whiteout from the unlinked list first to preserve correct
+ * AGI/AGF locking order. This dirties the transaction so failures
+ * after this point will abort and log recovery will clean up the
+ * mess.
+ *
+ * For whiteouts, we need to bump the link count on the whiteout
+ * inode. After this point, we have a real link, clear the tmpfile
+ * state flag from the inode so it doesn't accidentally get misused
+ * in future.
+ */
+ if (du_wip->ip) {
+ struct xfs_perag *pag;
+
+ ASSERT(VFS_I(du_wip->ip)->i_nlink == 0);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, du_wip->ip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, du_wip->ip);
+ xfs_perag_put(pag);
+ if (error)
+ return error;
+
+ xfs_bumplink(tp, du_wip->ip);
+ }
+
+ /*
+ * Set up the target.
+ */
+ if (target_ip == NULL) {
+ /*
+ * If target does not exist and the rename crosses
+ * directories, adjust the target directory link count
+ * to account for the ".." reference from the new entry.
+ */
+ error = xfs_dir_createname(tp, target_dp, target_name,
+ src_ip->i_ino, spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, target_dp,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ if (new_parent && src_is_directory) {
+ xfs_bumplink(tp, target_dp);
+ }
+ } else { /* target_ip != NULL */
+ /*
+ * Link the source inode under the target name.
+ * If the source inode is a directory and we are moving
+ * it across directories, its ".." entry will be
+ * inconsistent until we replace that down below.
+ *
+ * In case there is already an entry with the same
+ * name at the destination directory, remove it first.
+ */
+ error = xfs_dir_replace(tp, target_dp, target_name,
+ src_ip->i_ino, spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, target_dp,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /*
+ * Decrement the link count on the target since the target
+ * dir no longer points to it.
+ */
+ error = xfs_droplink(tp, target_ip);
+ if (error)
+ return error;
+
+ if (src_is_directory) {
+ /*
+ * Drop the link from the old "." entry.
+ */
+ error = xfs_droplink(tp, target_ip);
+ if (error)
+ return error;
+ }
+ } /* target_ip != NULL */
+
+ /*
+ * Remove the source.
+ */
+ if (new_parent && src_is_directory) {
+ /*
+ * Rewrite the ".." entry to point to the new
+ * directory.
+ */
+ error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
+ target_dp->i_ino, spaceres);
+ ASSERT(error != -EEXIST);
+ if (error)
+ return error;
+ }
+
+ /*
+ * We always want to hit the ctime on the source inode.
+ *
+ * This isn't strictly required by the standards since the source
+ * inode isn't really being changed, but old unix file systems did
+ * it and some incremental backup programs won't work without it.
+ */
+ xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
+
+ /*
+ * Adjust the link count on src_dp. This is necessary when
+ * renaming a directory, either within one parent when
+ * the target existed, or across two parent directories.
+ */
+ if (src_is_directory && (new_parent || target_ip != NULL)) {
+
+ /*
+ * Decrement link count on src_directory since the
+ * entry that's moved no longer points to it.
+ */
+ error = xfs_droplink(tp, src_dp);
+ if (error)
+ return error;
+ }
+
+ /*
+ * For whiteouts, we only need to update the source dirent with the
+ * inode number of the whiteout inode rather than removing it
+ * altogether.
+ */
+ if (du_wip->ip)
+ error = xfs_dir_replace(tp, src_dp, src_name, du_wip->ip->i_ino,
+ spaceres);
+ else
+ error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+ spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+ if (new_parent)
+ xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+
+ /* Schedule parent pointer updates. */
+ if (du_wip->ppargs) {
+ error = xfs_parent_addname(tp, du_wip->ppargs, src_dp,
+ src_name, du_wip->ip);
+ if (error)
+ return error;
+ }
+
+ if (du_src->ppargs) {
+ error = xfs_parent_replacename(tp, du_src->ppargs, src_dp,
+ src_name, target_dp, target_name, src_ip);
+ if (error)
+ return error;
+ }
+
+ if (du_tgt->ppargs) {
+ error = xfs_parent_removename(tp, du_tgt->ppargs, target_dp,
+ target_name, target_ip);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Inform our hook clients that we've finished a rename operation as
+ * follows: removed the source and target files from their directories;
+ * that we've added the source to the target directory; and finally
+ * that we've added the whiteout, if there was one. All inodes are
+ * locked, so it's ok to model a rename this way so long as we say we
+ * deleted entries before we add new ones.
+ */
+ if (target_ip)
+ xfs_dir_update_hook(target_dp, target_ip, -1, target_name);
+ xfs_dir_update_hook(src_dp, src_ip, -1, src_name);
+ xfs_dir_update_hook(target_dp, src_ip, 1, target_name);
+ if (du_wip->ip)
+ xfs_dir_update_hook(src_dp, du_wip->ip, 1, src_name);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 6dbe6e9ecb49..576068ed81fa 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -74,7 +74,7 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t inum,
xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name);
+ const struct xfs_name *name);
int xfs_dir_lookup_args(struct xfs_da_args *args);
int xfs_dir_createname_args(struct xfs_da_args *args);
@@ -309,4 +309,51 @@ static inline unsigned char xfs_ascii_ci_xfrm(unsigned char c)
return c;
}
+struct xfs_dir_update_params {
+ const struct xfs_inode *dp;
+ const struct xfs_inode *ip;
+ const struct xfs_name *name;
+ int delta;
+};
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip,
+ int delta, const struct xfs_name *name);
+
+struct xfs_dir_hook {
+ struct xfs_hook dirent_hook;
+};
+
+void xfs_dir_hook_disable(void);
+void xfs_dir_hook_enable(void);
+
+int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook);
+void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook);
+void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn);
+#else
+# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
+struct xfs_parent_args;
+
+struct xfs_dir_update {
+ struct xfs_inode *dp;
+ const struct xfs_name *name;
+ struct xfs_inode *ip;
+ struct xfs_parent_args *ppargs;
+};
+
+int xfs_dir_create_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+int xfs_dir_add_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+int xfs_dir_remove_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+
+int xfs_dir_exchange_children(struct xfs_trans *tp, struct xfs_dir_update *du1,
+ struct xfs_dir_update *du2, unsigned int spaceres);
+int xfs_dir_rename_children(struct xfs_trans *tp, struct xfs_dir_update *du_src,
+ struct xfs_dir_update *du_tgt, unsigned int spaceres,
+ struct xfs_dir_update *du_wip);
+
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 61f51becff4f..e1bfee0c3b1a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -90,8 +90,7 @@ struct xfs_ifork;
#define XFSLABEL_MAX 12
/*
- * Superblock - in core version. Must match the ondisk version below.
- * Must be padded to 64 bit alignment.
+ * Superblock - in core version. Must be padded to 64 bit alignment.
*/
typedef struct xfs_sb {
uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
@@ -178,10 +177,8 @@ typedef struct xfs_sb {
/* must be padded to 64 bit alignment */
} xfs_sb_t;
-#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
-
/*
- * Superblock - on disk version. Must match the in core version above.
+ * Superblock - on disk version.
* Must be padded to 64 bit alignment.
*/
struct xfs_dsb {
@@ -265,6 +262,8 @@ struct xfs_dsb {
/* must be padded to 64 bit alignment */
};
+#define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc)
+
/*
* Misc. Flags - warning - these will be cleared by xfs_repair unless
* a feature bit is set when the flag is used.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 14c81f227c5b..f8d5ed7aedde 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1946,6 +1946,21 @@ retry:
}
return -ENOSPC;
}
+
+ /*
+ * Protect against obviously corrupt allocation btree records. Later
+ * xfs_iget checks will catch re-allocation of other active in-memory
+ * and on-disk inodes. If we don't catch reallocating the parent inode
+ * here we will deadlock in xfs_iget() so we have to do these checks
+ * first.
+ */
+ if (ino == parent || !xfs_verify_dir_ino(mp, ino)) {
+ xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
+ xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
+ XFS_SICK_AG_INOBT);
+ return -EFSCORRUPTED;
+ }
+
*new_ino = ino;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
new file mode 100644
index 000000000000..032333289113
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -0,0 +1,749 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include <linux/iversion.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_inode_util.h"
+#include "xfs_trans.h"
+#include "xfs_ialloc.h"
+#include "xfs_health.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_ag.h"
+#include "xfs_iunlink_item.h"
+#include "xfs_inode_item.h"
+
+uint16_t
+xfs_flags2diflags(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ /* can't set PREALLOC this way, just preserve it */
+ uint16_t di_flags =
+ (ip->i_diflags & XFS_DIFLAG_PREALLOC);
+
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (xflags & FS_XFLAG_APPEND)
+ di_flags |= XFS_DIFLAG_APPEND;
+ if (xflags & FS_XFLAG_SYNC)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if (xflags & FS_XFLAG_NOATIME)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if (xflags & FS_XFLAG_NODUMP)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if (xflags & FS_XFLAG_NODEFRAG)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (xflags & FS_XFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ if (xflags & FS_XFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (xflags & FS_XFLAG_NOSYMLINKS)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ if (xflags & FS_XFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(VFS_I(ip)->i_mode)) {
+ if (xflags & FS_XFLAG_REALTIME)
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (xflags & FS_XFLAG_EXTSIZE)
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ }
+
+ return di_flags;
+}
+
+uint64_t
+xfs_flags2diflags2(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ uint64_t di_flags2 =
+ (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
+ XFS_DIFLAG2_BIGTIME |
+ XFS_DIFLAG2_NREXT64));
+
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+ if (xflags & FS_XFLAG_COWEXTSIZE)
+ di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+
+ return di_flags2;
+}
+
+uint32_t
+xfs_ip2xflags(
+ struct xfs_inode *ip)
+{
+ uint32_t flags = 0;
+
+ if (ip->i_diflags & XFS_DIFLAG_ANY) {
+ if (ip->i_diflags & XFS_DIFLAG_REALTIME)
+ flags |= FS_XFLAG_REALTIME;
+ if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
+ flags |= FS_XFLAG_PREALLOC;
+ if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
+ flags |= FS_XFLAG_IMMUTABLE;
+ if (ip->i_diflags & XFS_DIFLAG_APPEND)
+ flags |= FS_XFLAG_APPEND;
+ if (ip->i_diflags & XFS_DIFLAG_SYNC)
+ flags |= FS_XFLAG_SYNC;
+ if (ip->i_diflags & XFS_DIFLAG_NOATIME)
+ flags |= FS_XFLAG_NOATIME;
+ if (ip->i_diflags & XFS_DIFLAG_NODUMP)
+ flags |= FS_XFLAG_NODUMP;
+ if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
+ flags |= FS_XFLAG_RTINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ flags |= FS_XFLAG_PROJINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
+ flags |= FS_XFLAG_NOSYMLINKS;
+ if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
+ flags |= FS_XFLAG_EXTSIZE;
+ if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
+ flags |= FS_XFLAG_EXTSZINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
+ flags |= FS_XFLAG_NODEFRAG;
+ if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
+ flags |= FS_XFLAG_FILESTREAM;
+ }
+
+ if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
+ if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ flags |= FS_XFLAG_COWEXTSIZE;
+ }
+
+ if (xfs_inode_has_attr_fork(ip))
+ flags |= FS_XFLAG_HASATTR;
+ return flags;
+}
+
+prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+ if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ return dp->i_projid;
+
+ /* Assign to the root project by default. */
+ return 0;
+}
+
+/* Propagate di_flags from a parent inode to a child inode. */
+static inline void
+xfs_inode_inherit_flags(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ unsigned int di_flags = 0;
+ xfs_failaddr_t failaddr;
+ umode_t mode = VFS_I(ip)->i_mode;
+
+ if (S_ISDIR(mode)) {
+ if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_extsize = pip->i_extsize;
+ }
+ if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(mode)) {
+ if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ xfs_has_realtime(ip->i_mount))
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_extsize = pip->i_extsize;
+ }
+ }
+ if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
+ xfs_inherit_noatime)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
+ xfs_inherit_nodump)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
+ xfs_inherit_sync)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
+ xfs_inherit_nosymlinks)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
+ xfs_inherit_nodefrag)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+
+ ip->i_diflags |= di_flags;
+
+ /*
+ * Inode verifiers on older kernels only check that the extent size
+ * hint is an integer multiple of the rt extent size on realtime files.
+ * They did not check the hint alignment on a directory with both
+ * rtinherit and extszinherit flags set. If the misaligned hint is
+ * propagated from a directory into a new realtime file, new file
+ * allocations will fail due to math errors in the rt allocator and/or
+ * trip the verifiers. Validate the hint settings in the new file so
+ * that we don't let broken hints propagate.
+ */
+ failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
+ VFS_I(ip)->i_mode, ip->i_diflags);
+ if (failaddr) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ }
+}
+
+/* Propagate di_flags2 from a parent inode to a child inode. */
+static inline void
+xfs_inode_inherit_flags2(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ xfs_failaddr_t failaddr;
+
+ if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = pip->i_cowextsize;
+ }
+ if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
+ ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+
+ /* Don't let invalid cowextsize hints propagate. */
+ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+ VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
+ if (failaddr) {
+ ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = 0;
+ }
+}
+
+/*
+ * If we need to create attributes immediately after allocating the inode,
+ * initialise an empty attribute fork right now. We use the default fork offset
+ * for attributes here as we don't know exactly what size or how many
+ * attributes we might be adding. We can do this safely here because we know
+ * the data fork is completely empty and this saves us from needing to run a
+ * separate transaction to set the fork offset in the immediate future.
+ *
+ * If we have parent pointers and the caller hasn't told us that the file will
+ * never be linked into a directory tree, we /must/ create the attr fork.
+ */
+static inline bool
+xfs_icreate_want_attrfork(
+ struct xfs_mount *mp,
+ const struct xfs_icreate_args *args)
+{
+ if (args->flags & XFS_ICREATE_INIT_XATTRS)
+ return true;
+
+ if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp))
+ return true;
+
+ return false;
+}
+
+/* Initialise an inode's attributes. */
+void
+xfs_inode_init(
+ struct xfs_trans *tp,
+ const struct xfs_icreate_args *args,
+ struct xfs_inode *ip)
+{
+ struct xfs_inode *pip = args->pip;
+ struct inode *dir = pip ? VFS_I(pip) : NULL;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct inode *inode = VFS_I(ip);
+ unsigned int flags;
+ int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
+ XFS_ICHGTIME_ACCESS;
+
+ if (args->flags & XFS_ICREATE_TMPFILE)
+ set_nlink(inode, 0);
+ else if (S_ISDIR(args->mode))
+ set_nlink(inode, 2);
+ else
+ set_nlink(inode, 1);
+ inode->i_rdev = args->rdev;
+
+ if (!args->idmap || pip == NULL) {
+ /* creating a tree root, sb rooted, or detached file */
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ ip->i_projid = 0;
+ inode->i_mode = args->mode;
+ } else {
+ /* creating a child in the directory tree */
+ if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
+ inode_fsuid_set(inode, args->idmap);
+ inode->i_gid = dir->i_gid;
+ inode->i_mode = args->mode;
+ } else {
+ inode_init_owner(args->idmap, inode, dir, args->mode);
+ }
+
+ /*
+ * If the group ID of the new file does not match the effective
+ * group ID or one of the supplementary group IDs, the S_ISGID
+ * bit is cleared (and only if the irix_sgid_inherit
+ * compatibility variable is set).
+ */
+ if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
+ !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
+ inode->i_mode &= ~S_ISGID;
+
+ ip->i_projid = pip ? xfs_get_initial_prid(pip) : 0;
+ }
+
+ ip->i_disk_size = 0;
+ ip->i_df.if_nextents = 0;
+ ASSERT(ip->i_nblocks == 0);
+
+ ip->i_extsize = 0;
+ ip->i_diflags = 0;
+
+ if (xfs_has_v3inodes(mp)) {
+ inode_set_iversion(inode, 1);
+ ip->i_cowextsize = 0;
+ times |= XFS_ICHGTIME_CREATE;
+ }
+
+ xfs_trans_ichgtime(tp, ip, times);
+
+ flags = XFS_ILOG_CORE;
+ switch (args->mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ ip->i_df.if_format = XFS_DINODE_FMT_DEV;
+ flags |= XFS_ILOG_DEV;
+ break;
+ case S_IFREG:
+ case S_IFDIR:
+ if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
+ xfs_inode_inherit_flags(ip, pip);
+ if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
+ xfs_inode_inherit_flags2(ip, pip);
+ fallthrough;
+ case S_IFLNK:
+ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
+ ip->i_df.if_bytes = 0;
+ ip->i_df.if_data = NULL;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ if (xfs_icreate_want_attrfork(mp, args)) {
+ ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
+ xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
+
+ if (!xfs_has_attr(mp)) {
+ spin_lock(&mp->m_sb_lock);
+ xfs_add_attr(mp);
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+ }
+ }
+
+ xfs_trans_log_inode(tp, ip, flags);
+}
+
+/*
+ * In-Core Unlinked List Lookups
+ * =============================
+ *
+ * Every inode is supposed to be reachable from some other piece of metadata
+ * with the exception of the root directory. Inodes with a connection to a
+ * file descriptor but not linked from anywhere in the on-disk directory tree
+ * are collectively known as unlinked inodes, though the filesystem itself
+ * maintains links to these inodes so that on-disk metadata are consistent.
+ *
+ * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
+ * header contains a number of buckets that point to an inode, and each inode
+ * record has a pointer to the next inode in the hash chain. This
+ * singly-linked list causes scaling problems in the iunlink remove function
+ * because we must walk that list to find the inode that points to the inode
+ * being removed from the unlinked hash bucket list.
+ *
+ * Hence we keep an in-memory double linked list to link each inode on an
+ * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
+ * based lists would require having 64 list heads in the perag, one for each
+ * list. This is expensive in terms of memory (think millions of AGs) and cache
+ * misses on lookups. Instead, use the fact that inodes on the unlinked list
+ * must be referenced at the VFS level to keep them on the list and hence we
+ * have an existence guarantee for inodes on the unlinked list.
+ *
+ * Given we have an existence guarantee, we can use lockless inode cache lookups
+ * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
+ * for the double linked unlinked list, and we don't need any extra locking to
+ * keep the list safe as all manipulations are done under the AGI buffer lock.
+ * Keeping the list up to date does not require memory allocation, just finding
+ * the XFS inode and updating the next/prev unlinked list aginos.
+ */
+
+/*
+ * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
+ * is not in cache.
+ */
+static int
+xfs_iunlink_update_backref(
+ struct xfs_perag *pag,
+ xfs_agino_t prev_agino,
+ xfs_agino_t next_agino)
+{
+ struct xfs_inode *ip;
+
+ /* No update necessary if we are at the end of the list. */
+ if (next_agino == NULLAGINO)
+ return 0;
+
+ ip = xfs_iunlink_lookup(pag, next_agino);
+ if (!ip)
+ return -ENOLINK;
+
+ ip->i_prev_unlinked = prev_agino;
+ return 0;
+}
+
+/*
+ * Point the AGI unlinked bucket at an inode and log the results. The caller
+ * is responsible for validating the old value.
+ */
+STATIC int
+xfs_iunlink_update_bucket(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ unsigned int bucket_index,
+ xfs_agino_t new_agino)
+{
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t old_value;
+ int offset;
+
+ ASSERT(xfs_verify_agino_or_null(pag, new_agino));
+
+ old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
+ old_value, new_agino);
+
+ /*
+ * We should never find the head of the list already set to the value
+ * passed in because either we're adding or removing ourselves from the
+ * head of the list.
+ */
+ if (old_value == new_agino) {
+ xfs_buf_mark_corrupt(agibp);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
+ offset = offsetof(struct xfs_agi, agi_unlinked) +
+ (sizeof(xfs_agino_t) * bucket_index);
+ xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
+ return 0;
+}
+
+static int
+xfs_iunlink_insert_inode(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t next_agino;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+ int error;
+
+ /*
+ * Get the index into the agi hash table for the list this inode will
+ * go on. Make sure the pointer isn't garbage and that this inode
+ * isn't already on the list.
+ */
+ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (next_agino == agino ||
+ !xfs_verify_agino_or_null(pag, next_agino)) {
+ xfs_buf_mark_corrupt(agibp);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Update the prev pointer in the next inode to point back to this
+ * inode.
+ */
+ error = xfs_iunlink_update_backref(pag, agino, next_agino);
+ if (error == -ENOLINK)
+ error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
+ if (error)
+ return error;
+
+ if (next_agino != NULLAGINO) {
+ /*
+ * There is already another inode in the bucket, so point this
+ * inode to the current head of the list.
+ */
+ error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
+ if (error)
+ return error;
+ ip->i_next_unlinked = next_agino;
+ }
+
+ /* Point the head of the list to point to this inode. */
+ ip->i_prev_unlinked = NULLAGINO;
+ return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
+}
+
+/*
+ * This is called when the inode's link count has gone to 0 or we are creating
+ * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
+ *
+ * We place the on-disk inode on a list in the AGI. It will be pulled from this
+ * list when the inode is freed.
+ */
+int
+xfs_iunlink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ struct xfs_buf *agibp;
+ int error;
+
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+ ASSERT(VFS_I(ip)->i_mode != 0);
+ trace_xfs_iunlink(ip);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
+ /* Get the agi buffer first. It ensures lock ordering on the list. */
+ error = xfs_read_agi(pag, tp, 0, &agibp);
+ if (error)
+ goto out;
+
+ error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
+out:
+ xfs_perag_put(pag);
+ return error;
+}
+
+static int
+xfs_iunlink_remove_inode(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ xfs_agino_t head_agino;
+ short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+ int error;
+
+ trace_xfs_iunlink_remove(ip);
+
+ /*
+ * Get the index into the agi hash table for the list this inode will
+ * go on. Make sure the head pointer isn't garbage.
+ */
+ head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (!xfs_verify_agino(pag, head_agino)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ agi, sizeof(*agi));
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Set our inode's next_unlinked pointer to NULL and then return
+ * the old pointer value so that we can update whatever was previous
+ * to us in the list to point to whatever was next in the list.
+ */
+ error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
+ if (error)
+ return error;
+
+ /*
+ * Update the prev pointer in the next inode to point back to previous
+ * inode in the chain.
+ */
+ error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
+ ip->i_next_unlinked);
+ if (error == -ENOLINK)
+ error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
+ ip->i_next_unlinked);
+ if (error)
+ return error;
+
+ if (head_agino != agino) {
+ struct xfs_inode *prev_ip;
+
+ prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
+ if (!prev_ip) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
+ return -EFSCORRUPTED;
+ }
+
+ error = xfs_iunlink_log_inode(tp, prev_ip, pag,
+ ip->i_next_unlinked);
+ prev_ip->i_next_unlinked = ip->i_next_unlinked;
+ } else {
+ /* Point the head of the list to the next unlinked inode. */
+ error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
+ ip->i_next_unlinked);
+ }
+
+ ip->i_next_unlinked = NULLAGINO;
+ ip->i_prev_unlinked = 0;
+ return error;
+}
+
+/*
+ * Pull the on-disk inode from the AGI unlinked list.
+ */
+int
+xfs_iunlink_remove(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *ip)
+{
+ struct xfs_buf *agibp;
+ int error;
+
+ trace_xfs_iunlink_remove(ip);
+
+ /* Get the agi buffer first. It ensures lock ordering on the list. */
+ error = xfs_read_agi(pag, tp, 0, &agibp);
+ if (error)
+ return error;
+
+ return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
+}
+
+/*
+ * Decrement the link count on an inode & log the change. If this causes the
+ * link count to go to zero, move the inode to AGI unlinked list so that it can
+ * be freed when the last active reference goes away via xfs_inactive().
+ */
+int
+xfs_droplink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ if (inode->i_nlink)
+ return 0;
+
+ return xfs_iunlink(tp, ip);
+}
+
+/*
+ * Increment the link count on an inode & log the change.
+ */
+void
+xfs_bumplink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Free an inode in the ondisk index and zero it out. */
+int
+xfs_inode_uninit(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *ip,
+ struct xfs_icluster *xic)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ /*
+ * Free the inode first so that we guarantee that the AGI lock is going
+ * to be taken before we remove the inode from the unlinked list. This
+ * makes the AGI lock -> unlinked list modification order the same as
+ * used in O_TMPFILE creation.
+ */
+ error = xfs_difree(tp, pag, ip->i_ino, xic);
+ if (error)
+ return error;
+
+ error = xfs_iunlink_remove(tp, pag, ip);
+ if (error)
+ return error;
+
+ /*
+ * Free any local-format data sitting around before we reset the
+ * data fork to extents format. Note that the attr fork data has
+ * already been freed by xfs_attr_inactive.
+ */
+ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
+ kfree(ip->i_df.if_data);
+ ip->i_df.if_data = NULL;
+ ip->i_df.if_bytes = 0;
+ }
+
+ VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
+ ip->i_diflags = 0;
+ ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
+ ip->i_forkoff = 0; /* mark the attr fork not in use */
+ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
+
+ /*
+ * Bump the generation count so no one will be confused
+ * by reincarnations of this inode.
+ */
+ VFS_I(ip)->i_generation++;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_util.h b/fs/xfs/libxfs/xfs_inode_util.h
new file mode 100644
index 000000000000..060242998a23
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_inode_util.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_INODE_UTIL_H__
+#define __XFS_INODE_UTIL_H__
+
+struct xfs_icluster;
+
+uint16_t xfs_flags2diflags(struct xfs_inode *ip, unsigned int xflags);
+uint64_t xfs_flags2diflags2(struct xfs_inode *ip, unsigned int xflags);
+uint32_t xfs_dic2xflags(struct xfs_inode *ip);
+uint32_t xfs_ip2xflags(struct xfs_inode *ip);
+
+prid_t xfs_get_initial_prid(struct xfs_inode *dp);
+
+/*
+ * File creation context.
+ *
+ * Due to our only partial reliance on the VFS to propagate uid and gid values
+ * according to accepted Unix behaviors, callers must initialize idmap to the
+ * correct idmapping structure to get the correct inheritance behaviors when
+ * XFS_MOUNT_GRPID is set.
+ *
+ * To create files detached from the directory tree (e.g. quota inodes), set
+ * idmap to NULL. To create a tree root, set pip to NULL.
+ */
+struct xfs_icreate_args {
+ struct mnt_idmap *idmap;
+ struct xfs_inode *pip; /* parent inode or null */
+ dev_t rdev;
+ umode_t mode;
+
+#define XFS_ICREATE_TMPFILE (1U << 0) /* create an unlinked file */
+#define XFS_ICREATE_INIT_XATTRS (1U << 1) /* will set xattrs immediately */
+#define XFS_ICREATE_UNLINKABLE (1U << 2) /* cannot link into dir tree */
+ uint16_t flags;
+};
+
+/*
+ * Flags for xfs_trans_ichgtime().
+ */
+#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
+#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
+#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
+#define XFS_ICHGTIME_ACCESS 0x8 /* last access timestamp */
+void xfs_trans_ichgtime(struct xfs_trans *tp, struct xfs_inode *ip, int flags);
+
+void xfs_inode_init(struct xfs_trans *tp, const struct xfs_icreate_args *args,
+ struct xfs_inode *ip);
+
+int xfs_inode_uninit(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *ip, struct xfs_icluster *xic);
+
+int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
+int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *ip);
+int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip);
+void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip);
+
+#endif /* __XFS_INODE_UTIL_H__ */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index e8cdd77d03fa..23c133fd36f5 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -85,6 +85,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12);
*/
+ XFS_CHECK_OFFSET(struct xfs_dsb, sb_crc, 224);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 34f104ed372c..2f7413afbf46 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -177,13 +177,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_REFC_BTREE_REF 1
#define XFS_SSB_REF 0
-/*
- * Flags for xfs_trans_ichgtime().
- */
-#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
-#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
-#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
-
/* Computed inode geometry for the filesystem. */
struct xfs_ino_geometry {
/* Maximum inode count in this filesystem. */
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 69fc5b981352..3c40f37e82c7 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -68,6 +68,8 @@ xfs_trans_ichgtime(
inode_set_mtime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CHG)
inode_set_ctime_to_ts(inode, tv);
+ if (flags & XFS_ICHGTIME_ACCESS)
+ inode_set_atime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CREATE)
ip->i_crtime = tv;
}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 1ad8ec63a7f4..22f5f1a9d3f0 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -26,6 +26,7 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
+#include "xfs_dir2.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
#include "xfs_ag.h"
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index b747b625c5ee..d390d56cd875 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -40,11 +40,16 @@ xrep_tempfile_create(
struct xfs_scrub *sc,
uint16_t mode)
{
+ struct xfs_icreate_args args = {
+ .pip = sc->mp->m_rootip,
+ .mode = mode,
+ .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE,
+ };
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = NULL;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
struct xfs_trans_res *tres;
struct xfs_inode *dp = mp->m_rootip;
xfs_ino_t ino;
@@ -65,8 +70,7 @@ xrep_tempfile_create(
* inode should be completely root owned so that we don't fail due to
* quota limits.
*/
- error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
- XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
+ error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -87,14 +91,11 @@ xrep_tempfile_create(
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
if (error)
goto out_trans_cancel;
- error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
- 0, false, &sc->tempip);
+ error = xfs_icreate(tp, ino, &args, &sc->tempip);
if (error)
goto out_trans_cancel;
- /* Change the ownership of the inode to root. */
- VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
- VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
+ /* We don't touch file data, so drop the realtime flags. */
sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index f6ffb4f248f7..9355ccad9503 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -10,6 +10,10 @@
#define DEBUG 1
#endif
+#ifdef CONFIG_XFS_DEBUG_EXPENSIVE
+#define DEBUG_EXPENSIVE 1
+#endif
+
#ifdef CONFIG_XFS_ASSERT_FATAL
#define XFS_ASSERT_FATAL 1
#endif
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 43031842341a..47549cfa61cd 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -22,6 +22,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_error.h"
struct kmem_cache *xfs_buf_item_cache;
@@ -781,8 +782,39 @@ xfs_buf_item_committed(
return lsn;
}
+#ifdef DEBUG_EXPENSIVE
+static int
+xfs_buf_item_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_failaddr_t fa;
+
+ if (!bp->b_ops || !bp->b_ops->verify_struct)
+ return 0;
+ if (bip->bli_flags & XFS_BLI_STALE)
+ return 0;
+
+ fa = bp->b_ops->verify_struct(bp);
+ if (fa) {
+ xfs_buf_verifier_error(bp, -EFSCORRUPTED, bp->b_ops->name,
+ bp->b_addr, BBTOB(bp->b_length), fa);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+
+ return 0;
+}
+#else
+# define xfs_buf_item_precommit NULL
+#endif
+
static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
+ .iop_precommit = xfs_buf_item_precommit,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
.iop_unpin = xfs_buf_item_unpin,
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 6a1aae799cf1..7d19091215b0 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -17,6 +17,7 @@
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
#include "xfs_log.h"
+#include "xfs_error.h"
static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
{
@@ -193,8 +194,38 @@ xfs_qm_dquot_logitem_committing(
return xfs_qm_dquot_logitem_release(lip);
}
+#ifdef DEBUG_EXPENSIVE
+static int
+xfs_qm_dquot_logitem_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_disk_dquot ddq = { };
+ xfs_failaddr_t fa;
+
+ xfs_dquot_to_disk(&ddq, dqp);
+ fa = xfs_dquot_verify(mp, &ddq, dqp->q_id);
+ if (fa) {
+ XFS_CORRUPTION_ERROR("Bad dquot during logging",
+ XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq));
+ xfs_alert(mp,
+ "Metadata corruption detected at %pS, dquot 0x%x",
+ fa, dqp->q_id);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+
+ return 0;
+}
+#else
+# define xfs_qm_dquot_logitem_precommit NULL
+#endif
+
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
+ .iop_precommit = xfs_qm_dquot_logitem_precommit,
.iop_format = xfs_qm_dquot_logitem_format,
.iop_pin = xfs_qm_dquot_logitem_pin,
.iop_unpin = xfs_qm_dquot_logitem_unpin,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a4e3cd8971fc..62ca6c75117c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,55 +42,11 @@
#include "xfs_pnfs.h"
#include "xfs_parent.h"
#include "xfs_xattr.h"
-#include "xfs_sb.h"
+#include "xfs_inode_util.h"
struct kmem_cache *xfs_inode_cache;
/*
- * helper function to extract extent size hint from inode
- */
-xfs_extlen_t
-xfs_get_extsz_hint(
- struct xfs_inode *ip)
-{
- /*
- * No point in aligning allocations if we need to COW to actually
- * write to them.
- */
- if (xfs_is_always_cow_inode(ip))
- return 0;
- if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
- return ip->i_extsize;
- if (XFS_IS_REALTIME_INODE(ip) &&
- ip->i_mount->m_sb.sb_rextsize > 1)
- return ip->i_mount->m_sb.sb_rextsize;
- return 0;
-}
-
-/*
- * Helper function to extract CoW extent size hint from inode.
- * Between the extent size hint and the CoW extent size hint, we
- * return the greater of the two. If the value is zero (automatic),
- * use the default size.
- */
-xfs_extlen_t
-xfs_get_cowextsz_hint(
- struct xfs_inode *ip)
-{
- xfs_extlen_t a, b;
-
- a = 0;
- if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
- a = ip->i_cowextsize;
- b = xfs_get_extsz_hint(ip);
-
- a = max(a, b);
- if (a == 0)
- return XFS_DEFAULT_COWEXTSZ_HINT;
- return a;
-}
-
-/*
* These two are wrapper routines around the xfs_ilock() routine used to
* centralize some grungy code. They are used in places that wish to lock the
* inode solely for reading the extents. The reason these places can't just
@@ -567,55 +523,6 @@ xfs_lock_two_inodes(
}
}
-uint
-xfs_ip2xflags(
- struct xfs_inode *ip)
-{
- uint flags = 0;
-
- if (ip->i_diflags & XFS_DIFLAG_ANY) {
- if (ip->i_diflags & XFS_DIFLAG_REALTIME)
- flags |= FS_XFLAG_REALTIME;
- if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
- flags |= FS_XFLAG_PREALLOC;
- if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
- flags |= FS_XFLAG_IMMUTABLE;
- if (ip->i_diflags & XFS_DIFLAG_APPEND)
- flags |= FS_XFLAG_APPEND;
- if (ip->i_diflags & XFS_DIFLAG_SYNC)
- flags |= FS_XFLAG_SYNC;
- if (ip->i_diflags & XFS_DIFLAG_NOATIME)
- flags |= FS_XFLAG_NOATIME;
- if (ip->i_diflags & XFS_DIFLAG_NODUMP)
- flags |= FS_XFLAG_NODUMP;
- if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
- flags |= FS_XFLAG_RTINHERIT;
- if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
- flags |= FS_XFLAG_PROJINHERIT;
- if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
- flags |= FS_XFLAG_NOSYMLINKS;
- if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
- flags |= FS_XFLAG_EXTSIZE;
- if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
- flags |= FS_XFLAG_EXTSZINHERIT;
- if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
- flags |= FS_XFLAG_NODEFRAG;
- if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
- flags |= FS_XFLAG_FILESTREAM;
- }
-
- if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
- if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
- flags |= FS_XFLAG_DAX;
- if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
- flags |= FS_XFLAG_COWEXTSIZE;
- }
-
- if (xfs_inode_has_attr_fork(ip))
- flags |= FS_XFLAG_HASATTR;
- return flags;
-}
-
/*
* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
* is allowed, otherwise it has to be an exact match. If a CI match is found,
@@ -657,97 +564,6 @@ out_unlock:
return error;
}
-/* Propagate di_flags from a parent inode to a child inode. */
-static void
-xfs_inode_inherit_flags(
- struct xfs_inode *ip,
- const struct xfs_inode *pip)
-{
- unsigned int di_flags = 0;
- xfs_failaddr_t failaddr;
- umode_t mode = VFS_I(ip)->i_mode;
-
- if (S_ISDIR(mode)) {
- if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- ip->i_extsize = pip->i_extsize;
- }
- if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(mode)) {
- if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- xfs_has_realtime(ip->i_mount))
- di_flags |= XFS_DIFLAG_REALTIME;
- if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
- di_flags |= XFS_DIFLAG_EXTSIZE;
- ip->i_extsize = pip->i_extsize;
- }
- }
- if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
- xfs_inherit_noatime)
- di_flags |= XFS_DIFLAG_NOATIME;
- if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
- xfs_inherit_nodump)
- di_flags |= XFS_DIFLAG_NODUMP;
- if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
- xfs_inherit_sync)
- di_flags |= XFS_DIFLAG_SYNC;
- if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
- xfs_inherit_nosymlinks)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
- xfs_inherit_nodefrag)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
-
- ip->i_diflags |= di_flags;
-
- /*
- * Inode verifiers on older kernels only check that the extent size
- * hint is an integer multiple of the rt extent size on realtime files.
- * They did not check the hint alignment on a directory with both
- * rtinherit and extszinherit flags set. If the misaligned hint is
- * propagated from a directory into a new realtime file, new file
- * allocations will fail due to math errors in the rt allocator and/or
- * trip the verifiers. Validate the hint settings in the new file so
- * that we don't let broken hints propagate.
- */
- failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
- VFS_I(ip)->i_mode, ip->i_diflags);
- if (failaddr) {
- ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
- XFS_DIFLAG_EXTSZINHERIT);
- ip->i_extsize = 0;
- }
-}
-
-/* Propagate di_flags2 from a parent inode to a child inode. */
-static void
-xfs_inode_inherit_flags2(
- struct xfs_inode *ip,
- const struct xfs_inode *pip)
-{
- xfs_failaddr_t failaddr;
-
- if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
- ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
- ip->i_cowextsize = pip->i_cowextsize;
- }
- if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
- ip->i_diflags2 |= XFS_DIFLAG2_DAX;
-
- /* Don't let invalid cowextsize hints propagate. */
- failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
- VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
- if (failaddr) {
- ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
- ip->i_cowextsize = 0;
- }
-}
-
/*
* Initialise a newly allocated inode and return the in-core inode to the
* caller locked exclusively.
@@ -755,39 +571,15 @@ xfs_inode_inherit_flags2(
* Caller is responsible for unlocking the inode manually upon return
*/
int
-xfs_init_new_inode(
- struct mnt_idmap *idmap,
+xfs_icreate(
struct xfs_trans *tp,
- struct xfs_inode *pip,
xfs_ino_t ino,
- umode_t mode,
- xfs_nlink_t nlink,
- dev_t rdev,
- prid_t prid,
- bool init_xattrs,
+ const struct xfs_icreate_args *args,
struct xfs_inode **ipp)
{
- struct inode *dir = pip ? VFS_I(pip) : NULL;
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_inode *ip;
- unsigned int flags;
+ struct xfs_inode *ip = NULL;
int error;
- struct timespec64 tv;
- struct inode *inode;
-
- /*
- * Protect against obviously corrupt allocation btree records. Later
- * xfs_iget checks will catch re-allocation of other active in-memory
- * and on-disk inodes. If we don't catch reallocating the parent inode
- * here we will deadlock in xfs_iget() so we have to do these checks
- * first.
- */
- if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) {
- xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
- xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
- XFS_SICK_AG_INOBT);
- return -EFSCORRUPTED;
- }
/*
* Get the in-core inode with the lock held exclusively to prevent
@@ -798,96 +590,8 @@ xfs_init_new_inode(
return error;
ASSERT(ip != NULL);
- inode = VFS_I(ip);
- set_nlink(inode, nlink);
- inode->i_rdev = rdev;
- ip->i_projid = prid;
-
- if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
- inode_fsuid_set(inode, idmap);
- inode->i_gid = dir->i_gid;
- inode->i_mode = mode;
- } else {
- inode_init_owner(idmap, inode, dir, mode);
- }
-
- /*
- * If the group ID of the new file does not match the effective group
- * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
- * (and only if the irix_sgid_inherit compatibility variable is set).
- */
- if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
- !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)))
- inode->i_mode &= ~S_ISGID;
-
- ip->i_disk_size = 0;
- ip->i_df.if_nextents = 0;
- ASSERT(ip->i_nblocks == 0);
-
- tv = inode_set_ctime_current(inode);
- inode_set_mtime_to_ts(inode, tv);
- inode_set_atime_to_ts(inode, tv);
-
- ip->i_extsize = 0;
- ip->i_diflags = 0;
-
- if (xfs_has_v3inodes(mp)) {
- inode_set_iversion(inode, 1);
- ip->i_cowextsize = 0;
- ip->i_crtime = tv;
- }
-
- flags = XFS_ILOG_CORE;
- switch (mode & S_IFMT) {
- case S_IFIFO:
- case S_IFCHR:
- case S_IFBLK:
- case S_IFSOCK:
- ip->i_df.if_format = XFS_DINODE_FMT_DEV;
- flags |= XFS_ILOG_DEV;
- break;
- case S_IFREG:
- case S_IFDIR:
- if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
- xfs_inode_inherit_flags(ip, pip);
- if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
- xfs_inode_inherit_flags2(ip, pip);
- fallthrough;
- case S_IFLNK:
- ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
- ip->i_df.if_bytes = 0;
- ip->i_df.if_data = NULL;
- break;
- default:
- ASSERT(0);
- }
-
- /*
- * If we need to create attributes immediately after allocating the
- * inode, initialise an empty attribute fork right now. We use the
- * default fork offset for attributes here as we don't know exactly what
- * size or how many attributes we might be adding. We can do this
- * safely here because we know the data fork is completely empty and
- * this saves us from needing to run a separate transaction to set the
- * fork offset in the immediate future.
- */
- if (init_xattrs) {
- ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
- xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
-
- if (!xfs_has_attr(mp)) {
- spin_lock(&mp->m_sb_lock);
- xfs_add_attr(mp);
- spin_unlock(&mp->m_sb_lock);
- xfs_log_sb(tp);
- }
- }
-
- /*
- * Log the new values stuffed into the inode.
- */
xfs_trans_ijoin(tp, ip, 0);
- xfs_trans_log_inode(tp, ip, flags);
+ xfs_inode_init(tp, args, ip);
/* now that we have an i_mode we can setup the inode structure */
xfs_setup_inode(ip);
@@ -896,158 +600,60 @@ xfs_init_new_inode(
return 0;
}
-/*
- * Decrement the link count on an inode & log the change. If this causes the
- * link count to go to zero, move the inode to AGI unlinked list so that it can
- * be freed when the last active reference goes away via xfs_inactive().
- */
+/* Return dquots for the ids that will be assigned to a new file. */
int
-xfs_droplink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-
- if (inode->i_nlink == 0) {
- xfs_info_ratelimited(tp->t_mountp,
- "Inode 0x%llx link count dropped below zero. Pinning link count.",
- ip->i_ino);
- set_nlink(inode, XFS_NLINK_PINNED);
- }
- if (inode->i_nlink != XFS_NLINK_PINNED)
- drop_nlink(inode);
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- if (inode->i_nlink)
- return 0;
-
- return xfs_iunlink(tp, ip);
-}
-
-/*
- * Increment the link count on an inode & log the change.
- */
-void
-xfs_bumplink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-
- if (inode->i_nlink == XFS_NLINK_PINNED - 1)
- xfs_info_ratelimited(tp->t_mountp,
- "Inode 0x%llx link count exceeded maximum. Pinning link count.",
- ip->i_ino);
- if (inode->i_nlink != XFS_NLINK_PINNED)
- inc_nlink(inode);
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-}
-
-#ifdef CONFIG_XFS_LIVE_HOOKS
-/*
- * Use a static key here to reduce the overhead of directory live update hooks.
- * If the compiler supports jump labels, the static branch will be replaced by
- * a nop sled when there are no hook users. Online fsck is currently the only
- * caller, so this is a reasonable tradeoff.
- *
- * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
- * parts of the kernel allocate memory with that lock held, which means that
- * XFS callers cannot hold any locks that might be used by memory reclaim or
- * writeback when calling the static_branch_{inc,dec} functions.
- */
-DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch);
-
-void
-xfs_dir_hook_disable(void)
-{
- xfs_hooks_switch_off(&xfs_dir_hooks_switch);
-}
-
-void
-xfs_dir_hook_enable(void)
-{
- xfs_hooks_switch_on(&xfs_dir_hooks_switch);
-}
-
-/* Call hooks for a directory update relating to a child dirent update. */
-inline void
-xfs_dir_update_hook(
- struct xfs_inode *dp,
- struct xfs_inode *ip,
- int delta,
- const struct xfs_name *name)
-{
- if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) {
- struct xfs_dir_update_params p = {
- .dp = dp,
- .ip = ip,
- .delta = delta,
- .name = name,
- };
- struct xfs_mount *mp = ip->i_mount;
-
- xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p);
+xfs_icreate_dqalloc(
+ const struct xfs_icreate_args *args,
+ struct xfs_dquot **udqpp,
+ struct xfs_dquot **gdqpp,
+ struct xfs_dquot **pdqpp)
+{
+ struct inode *dir = VFS_I(args->pip);
+ kuid_t uid = GLOBAL_ROOT_UID;
+ kgid_t gid = GLOBAL_ROOT_GID;
+ prid_t prid = 0;
+ unsigned int flags = XFS_QMOPT_QUOTALL;
+
+ if (args->idmap) {
+ /*
+ * The uid/gid computation code must match what the VFS uses to
+ * assign i_[ug]id. INHERIT adjusts the gid computation for
+ * setgid/grpid systems.
+ */
+ uid = mapped_fsuid(args->idmap, i_user_ns(dir));
+ gid = mapped_fsgid(args->idmap, i_user_ns(dir));
+ prid = xfs_get_initial_prid(args->pip);
+ flags |= XFS_QMOPT_INHERIT;
}
-}
-/* Call the specified function during a directory update. */
-int
-xfs_dir_hook_add(
- struct xfs_mount *mp,
- struct xfs_dir_hook *hook)
-{
- return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook);
-}
+ *udqpp = *gdqpp = *pdqpp = NULL;
-/* Stop calling the specified function during a directory update. */
-void
-xfs_dir_hook_del(
- struct xfs_mount *mp,
- struct xfs_dir_hook *hook)
-{
- xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook);
+ return xfs_qm_vop_dqalloc(args->pip, uid, gid, prid, flags, udqpp,
+ gdqpp, pdqpp);
}
-/* Configure directory update hook functions. */
-void
-xfs_dir_hook_setup(
- struct xfs_dir_hook *hook,
- notifier_fn_t mod_fn)
-{
- xfs_hook_setup(&hook->dirent_hook, mod_fn);
-}
-#endif /* CONFIG_XFS_LIVE_HOOKS */
-
int
xfs_create(
- struct mnt_idmap *idmap,
- struct xfs_inode *dp,
+ const struct xfs_icreate_args *args,
struct xfs_name *name,
- umode_t mode,
- dev_t rdev,
- bool init_xattrs,
- xfs_inode_t **ipp)
+ struct xfs_inode **ipp)
{
- int is_dir = S_ISDIR(mode);
+ struct xfs_inode *dp = args->pip;
+ struct xfs_dir_update du = {
+ .dp = dp,
+ .name = name,
+ };
struct xfs_mount *mp = dp->i_mount;
- struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
- int error;
- bool unlock_dp_on_error = false;
- prid_t prid;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
struct xfs_trans_res *tres;
- uint resblks;
xfs_ino_t ino;
- struct xfs_parent_args *ppargs;
+ bool unlock_dp_on_error = false;
+ bool is_dir = S_ISDIR(args->mode);
+ uint resblks;
+ int error;
trace_xfs_create(dp, name);
@@ -1056,15 +662,8 @@ xfs_create(
if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
return -EIO;
- prid = xfs_get_initial_prid(dp);
-
- /*
- * Make sure that we have allocated dquot(s) on disk.
- */
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
- mapped_fsgid(idmap, &init_user_ns), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ /* Make sure that we have allocated dquot(s) on disk. */
+ error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -1076,7 +675,7 @@ xfs_create(
tres = &M_RES(mp)->tr_create;
}
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto out_release_dquots;
@@ -1105,10 +704,9 @@ xfs_create(
* entry pointing to them, but a directory also the "." entry
* pointing to itself.
*/
- error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino);
if (!error)
- error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
- is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip);
+ error = xfs_icreate(tp, ino, args, &du.ip);
if (error)
goto out_trans_cancel;
@@ -1121,38 +719,9 @@ xfs_create(
*/
xfs_trans_ijoin(tp, dp, 0);
- error = xfs_dir_createname(tp, dp, name, ip->i_ino,
- resblks - XFS_IALLOC_SPACE_RES(mp));
- if (error) {
- ASSERT(error != -ENOSPC);
+ error = xfs_dir_create_child(tp, resblks, &du);
+ if (error)
goto out_trans_cancel;
- }
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-
- if (is_dir) {
- error = xfs_dir_init(tp, ip, dp);
- if (error)
- goto out_trans_cancel;
-
- xfs_bumplink(tp, dp);
- }
-
- /*
- * If we have parent pointers, we need to add the attribute containing
- * the parent information now.
- */
- if (ppargs) {
- error = xfs_parent_addname(tp, ppargs, dp, name, ip);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * Create ip with a reference from dp, and add '.' and '..' references
- * if it's a directory.
- */
- xfs_dir_update_hook(dp, ip, 1, name);
/*
* If this is a synchronous mount, make sure that the
@@ -1167,7 +736,7 @@ xfs_create(
* These ids of the inode couldn't have changed since the new
* inode has been locked ever since it was created.
*/
- xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+ xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
error = xfs_trans_commit(tp);
if (error)
@@ -1177,10 +746,10 @@ xfs_create(
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
- *ipp = ip;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ *ipp = du.ip;
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return 0;
out_trans_cancel:
@@ -1191,13 +760,13 @@ xfs_create(
* setup of the inode and release the inode. This prevents recursive
* transactions and deadlocks from xfs_inactive.
*/
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_finish_inode_setup(ip);
- xfs_irele(ip);
+ if (du.ip) {
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
+ xfs_finish_inode_setup(du.ip);
+ xfs_irele(du.ip);
}
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
@@ -1210,36 +779,28 @@ xfs_create(
int
xfs_create_tmpfile(
- struct mnt_idmap *idmap,
- struct xfs_inode *dp,
- umode_t mode,
- bool init_xattrs,
+ const struct xfs_icreate_args *args,
struct xfs_inode **ipp)
{
+ struct xfs_inode *dp = args->pip;
struct xfs_mount *mp = dp->i_mount;
struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
- int error;
- prid_t prid;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
struct xfs_trans_res *tres;
- uint resblks;
xfs_ino_t ino;
+ uint resblks;
+ int error;
+
+ ASSERT(args->flags & XFS_ICREATE_TMPFILE);
if (xfs_is_shutdown(mp))
return -EIO;
- prid = xfs_get_initial_prid(dp);
-
- /*
- * Make sure that we have allocated dquot(s) on disk.
- */
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
- mapped_fsgid(idmap, &init_user_ns), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ /* Make sure that we have allocated dquot(s) on disk. */
+ error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -1251,10 +812,9 @@ xfs_create_tmpfile(
if (error)
goto out_release_dquots;
- error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino);
if (!error)
- error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
- 0, 0, prid, init_xattrs, &ip);
+ error = xfs_icreate(tp, ino, args, &ip);
if (error)
goto out_trans_cancel;
@@ -1311,11 +871,15 @@ xfs_link(
struct xfs_inode *sip,
struct xfs_name *target_name)
{
+ struct xfs_dir_update du = {
+ .dp = tdp,
+ .name = target_name,
+ .ip = sip,
+ };
struct xfs_mount *mp = tdp->i_mount;
struct xfs_trans *tp;
int error, nospace_error = 0;
int resblks;
- struct xfs_parent_args *ppargs;
trace_xfs_link(tdp, target_name);
@@ -1334,7 +898,7 @@ xfs_link(
if (error)
goto std_return;
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto std_return;
@@ -1349,7 +913,7 @@ xfs_link(
* pointers are enabled because we can't back out if the xattrs must
* grow.
*/
- if (ppargs && nospace_error) {
+ if (du.ppargs && nospace_error) {
error = nospace_error;
goto error_return;
}
@@ -1376,47 +940,9 @@ xfs_link(
}
}
- if (!resblks) {
- error = xfs_dir_canenter(tp, tdp, target_name);
- if (error)
- goto error_return;
- }
-
- /*
- * Handle initial link state of O_TMPFILE inode
- */
- if (VFS_I(sip)->i_nlink == 0) {
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino));
- error = xfs_iunlink_remove(tp, pag, sip);
- xfs_perag_put(pag);
- if (error)
- goto error_return;
- }
-
- error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
- resblks);
+ error = xfs_dir_add_child(tp, resblks, &du);
if (error)
goto error_return;
- xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
-
- xfs_bumplink(tp, sip);
-
- /*
- * If we have parent pointers, we now need to add the parent record to
- * the attribute fork of the inode. If this is the initial parent
- * attribute, we need to create it correctly, otherwise we can just add
- * the parent to the inode.
- */
- if (ppargs) {
- error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip);
- if (error)
- goto error_return;
- }
-
- xfs_dir_update_hook(tdp, sip, 1, target_name);
/*
* If this is a synchronous mount, make sure that the
@@ -1429,7 +955,7 @@ xfs_link(
error = xfs_trans_commit(tp);
xfs_iunlock(tdp, XFS_ILOCK_EXCL);
xfs_iunlock(sip, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return error;
error_return:
@@ -1437,7 +963,7 @@ xfs_link(
xfs_iunlock(tdp, XFS_ILOCK_EXCL);
xfs_iunlock(sip, XFS_ILOCK_EXCL);
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
std_return:
if (error == -ENOSPC && nospace_error)
error = nospace_error;
@@ -2024,39 +1550,6 @@ out:
}
/*
- * In-Core Unlinked List Lookups
- * =============================
- *
- * Every inode is supposed to be reachable from some other piece of metadata
- * with the exception of the root directory. Inodes with a connection to a
- * file descriptor but not linked from anywhere in the on-disk directory tree
- * are collectively known as unlinked inodes, though the filesystem itself
- * maintains links to these inodes so that on-disk metadata are consistent.
- *
- * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
- * header contains a number of buckets that point to an inode, and each inode
- * record has a pointer to the next inode in the hash chain. This
- * singly-linked list causes scaling problems in the iunlink remove function
- * because we must walk that list to find the inode that points to the inode
- * being removed from the unlinked hash bucket list.
- *
- * Hence we keep an in-memory double linked list to link each inode on an
- * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
- * based lists would require having 64 list heads in the perag, one for each
- * list. This is expensive in terms of memory (think millions of AGs) and cache
- * misses on lookups. Instead, use the fact that inodes on the unlinked list
- * must be referenced at the VFS level to keep them on the list and hence we
- * have an existence guarantee for inodes on the unlinked list.
- *
- * Given we have an existence guarantee, we can use lockless inode cache lookups
- * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
- * for the double linked unlinked list, and we don't need any extra locking to
- * keep the list safe as all manipulations are done under the AGI buffer lock.
- * Keeping the list up to date does not require memory allocation, just finding
- * the XFS inode and updating the next/prev unlinked list aginos.
- */
-
-/*
* Find an inode on the unlinked list. This does not take references to the
* inode as we have existence guarantees by holding the AGI buffer lock and that
* only unlinked, referenced inodes can be on the unlinked inode list. If we
@@ -2091,75 +1584,11 @@ xfs_iunlink_lookup(
}
/*
- * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
- * is not in cache.
- */
-static int
-xfs_iunlink_update_backref(
- struct xfs_perag *pag,
- xfs_agino_t prev_agino,
- xfs_agino_t next_agino)
-{
- struct xfs_inode *ip;
-
- /* No update necessary if we are at the end of the list. */
- if (next_agino == NULLAGINO)
- return 0;
-
- ip = xfs_iunlink_lookup(pag, next_agino);
- if (!ip)
- return -ENOLINK;
-
- ip->i_prev_unlinked = prev_agino;
- return 0;
-}
-
-/*
- * Point the AGI unlinked bucket at an inode and log the results. The caller
- * is responsible for validating the old value.
- */
-STATIC int
-xfs_iunlink_update_bucket(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- unsigned int bucket_index,
- xfs_agino_t new_agino)
-{
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t old_value;
- int offset;
-
- ASSERT(xfs_verify_agino_or_null(pag, new_agino));
-
- old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
- old_value, new_agino);
-
- /*
- * We should never find the head of the list already set to the value
- * passed in because either we're adding or removing ourselves from the
- * head of the list.
- */
- if (old_value == new_agino) {
- xfs_buf_mark_corrupt(agibp);
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
- offset = offsetof(struct xfs_agi, agi_unlinked) +
- (sizeof(xfs_agino_t) * bucket_index);
- xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
- return 0;
-}
-
-/*
* Load the inode @next_agino into the cache and set its prev_unlinked pointer
* to @prev_agino. Caller must hold the AGI to synchronize with other changes
* to the unlinked list.
*/
-STATIC int
+int
xfs_iunlink_reload_next(
struct xfs_trans *tp,
struct xfs_buf *agibp,
@@ -2215,187 +1644,6 @@ rele:
return error;
}
-static int
-xfs_iunlink_insert_inode(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t next_agino;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
- short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- int error;
-
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the pointer isn't garbage and that this inode
- * isn't already on the list.
- */
- next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (next_agino == agino ||
- !xfs_verify_agino_or_null(pag, next_agino)) {
- xfs_buf_mark_corrupt(agibp);
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- /*
- * Update the prev pointer in the next inode to point back to this
- * inode.
- */
- error = xfs_iunlink_update_backref(pag, agino, next_agino);
- if (error == -ENOLINK)
- error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
- if (error)
- return error;
-
- if (next_agino != NULLAGINO) {
- /*
- * There is already another inode in the bucket, so point this
- * inode to the current head of the list.
- */
- error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
- if (error)
- return error;
- ip->i_next_unlinked = next_agino;
- }
-
- /* Point the head of the list to point to this inode. */
- ip->i_prev_unlinked = NULLAGINO;
- return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
-}
-
-/*
- * This is called when the inode's link count has gone to 0 or we are creating
- * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
- *
- * We place the on-disk inode on a list in the AGI. It will be pulled from this
- * list when the inode is freed.
- */
-int
-xfs_iunlink(
- struct xfs_trans *tp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_perag *pag;
- struct xfs_buf *agibp;
- int error;
-
- ASSERT(VFS_I(ip)->i_nlink == 0);
- ASSERT(VFS_I(ip)->i_mode != 0);
- trace_xfs_iunlink(ip);
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(pag, tp, 0, &agibp);
- if (error)
- goto out;
-
- error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
-out:
- xfs_perag_put(pag);
- return error;
-}
-
-static int
-xfs_iunlink_remove_inode(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_buf *agibp,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_agi *agi = agibp->b_addr;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
- xfs_agino_t head_agino;
- short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- int error;
-
- trace_xfs_iunlink_remove(ip);
-
- /*
- * Get the index into the agi hash table for the list this inode will
- * go on. Make sure the head pointer isn't garbage.
- */
- head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- if (!xfs_verify_agino(pag, head_agino)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- agi, sizeof(*agi));
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
- return -EFSCORRUPTED;
- }
-
- /*
- * Set our inode's next_unlinked pointer to NULL and then return
- * the old pointer value so that we can update whatever was previous
- * to us in the list to point to whatever was next in the list.
- */
- error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
- if (error)
- return error;
-
- /*
- * Update the prev pointer in the next inode to point back to previous
- * inode in the chain.
- */
- error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
- ip->i_next_unlinked);
- if (error == -ENOLINK)
- error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
- ip->i_next_unlinked);
- if (error)
- return error;
-
- if (head_agino != agino) {
- struct xfs_inode *prev_ip;
-
- prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
- if (!prev_ip) {
- xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
- return -EFSCORRUPTED;
- }
-
- error = xfs_iunlink_log_inode(tp, prev_ip, pag,
- ip->i_next_unlinked);
- prev_ip->i_next_unlinked = ip->i_next_unlinked;
- } else {
- /* Point the head of the list to the next unlinked inode. */
- error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
- ip->i_next_unlinked);
- }
-
- ip->i_next_unlinked = NULLAGINO;
- ip->i_prev_unlinked = 0;
- return error;
-}
-
-/*
- * Pull the on-disk inode from the AGI unlinked list.
- */
-int
-xfs_iunlink_remove(
- struct xfs_trans *tp,
- struct xfs_perag *pag,
- struct xfs_inode *ip)
-{
- struct xfs_buf *agibp;
- int error;
-
- trace_xfs_iunlink_remove(ip);
-
- /* Get the agi buffer first. It ensures lock ordering on the list. */
- error = xfs_read_agi(pag, tp, 0, &agibp);
- if (error)
- return error;
-
- return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
-}
-
/*
* Look up the inode number specified and if it is not already marked XFS_ISTALE
* mark it stale. We should only find clean inodes in this lookup that aren't
@@ -2614,36 +1862,10 @@ xfs_ifree(
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- /*
- * Free the inode first so that we guarantee that the AGI lock is going
- * to be taken before we remove the inode from the unlinked list. This
- * makes the AGI lock -> unlinked list modification order the same as
- * used in O_TMPFILE creation.
- */
- error = xfs_difree(tp, pag, ip->i_ino, &xic);
- if (error)
- goto out;
-
- error = xfs_iunlink_remove(tp, pag, ip);
+ error = xfs_inode_uninit(tp, pag, ip, &xic);
if (error)
goto out;
- /*
- * Free any local-format data sitting around before we reset the
- * data fork to extents format. Note that the attr fork data has
- * already been freed by xfs_attr_inactive.
- */
- if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- kfree(ip->i_df.if_data);
- ip->i_df.if_data = NULL;
- ip->i_df.if_bytes = 0;
- }
-
- VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
- ip->i_diflags = 0;
- ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
- ip->i_forkoff = 0; /* mark the attr fork not in use */
- ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
@@ -2652,13 +1874,6 @@ xfs_ifree(
iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER);
spin_unlock(&iip->ili_lock);
- /*
- * Bump the generation count so no one will be confused
- * by reincarnations of this inode.
- */
- VFS_I(ip)->i_generation++;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
if (xic.deleted)
error = xfs_ifree_cluster(tp, pag, ip, &xic);
out:
@@ -2742,13 +1957,17 @@ xfs_remove(
struct xfs_name *name,
struct xfs_inode *ip)
{
+ struct xfs_dir_update du = {
+ .dp = dp,
+ .name = name,
+ .ip = ip,
+ };
struct xfs_mount *mp = dp->i_mount;
struct xfs_trans *tp = NULL;
int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
int dontcare;
int error = 0;
uint resblks;
- struct xfs_parent_args *ppargs;
trace_xfs_remove(dp, name);
@@ -2765,7 +1984,7 @@ xfs_remove(
if (error)
goto std_return;
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto std_return;
@@ -2788,76 +2007,10 @@ xfs_remove(
goto out_parent;
}
- /*
- * If we're removing a directory perform some additional validation.
- */
- if (is_dir) {
- ASSERT(VFS_I(ip)->i_nlink >= 2);
- if (VFS_I(ip)->i_nlink != 2) {
- error = -ENOTEMPTY;
- goto out_trans_cancel;
- }
- if (!xfs_dir_isempty(ip)) {
- error = -ENOTEMPTY;
- goto out_trans_cancel;
- }
-
- /* Drop the link from ip's "..". */
- error = xfs_droplink(tp, dp);
- if (error)
- goto out_trans_cancel;
-
- /* Drop the "." link from ip to self. */
- error = xfs_droplink(tp, ip);
- if (error)
- goto out_trans_cancel;
-
- /*
- * Point the unlinked child directory's ".." entry to the root
- * directory to eliminate back-references to inodes that may
- * get freed before the child directory is closed. If the fs
- * gets shrunk, this can lead to dirent inode validation errors.
- */
- if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
- error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
- tp->t_mountp->m_sb.sb_rootino, 0);
- if (error)
- goto out_trans_cancel;
- }
- } else {
- /*
- * When removing a non-directory we need to log the parent
- * inode here. For a directory this is done implicitly
- * by the xfs_droplink call for the ".." entry.
- */
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
- }
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- /* Drop the link from dp to ip. */
- error = xfs_droplink(tp, ip);
+ error = xfs_dir_remove_child(tp, resblks, &du);
if (error)
goto out_trans_cancel;
- error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
- if (error) {
- ASSERT(error != -ENOENT);
- goto out_trans_cancel;
- }
-
- /* Remove parent pointer. */
- if (ppargs) {
- error = xfs_parent_removename(tp, ppargs, dp, name, ip);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * Drop the link from dp to ip, and if ip was a directory, remove the
- * '.' and '..' references since we freed the directory.
- */
- xfs_dir_update_hook(dp, ip, -1, name);
-
/*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
@@ -2875,7 +2028,7 @@ xfs_remove(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return 0;
out_trans_cancel:
@@ -2884,7 +2037,7 @@ xfs_remove(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
std_return:
return error;
}
@@ -2964,160 +2117,6 @@ xfs_sort_inodes(
}
}
-static int
-xfs_finish_rename(
- struct xfs_trans *tp)
-{
- /*
- * If this is a synchronous mount, make sure that the rename transaction
- * goes to disk before returning to the user.
- */
- if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
- xfs_trans_set_sync(tp);
-
- return xfs_trans_commit(tp);
-}
-
-/*
- * xfs_cross_rename()
- *
- * responsible for handling RENAME_EXCHANGE flag in renameat2() syscall
- */
-STATIC int
-xfs_cross_rename(
- struct xfs_trans *tp,
- struct xfs_inode *dp1,
- struct xfs_name *name1,
- struct xfs_inode *ip1,
- struct xfs_parent_args *ip1_ppargs,
- struct xfs_inode *dp2,
- struct xfs_name *name2,
- struct xfs_inode *ip2,
- struct xfs_parent_args *ip2_ppargs,
- int spaceres)
-{
- int error = 0;
- int ip1_flags = 0;
- int ip2_flags = 0;
- int dp2_flags = 0;
-
- /* Swap inode number for dirent in first parent */
- error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /* Swap inode number for dirent in second parent */
- error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /*
- * If we're renaming one or more directories across different parents,
- * update the respective ".." entries (and link counts) to match the new
- * parents.
- */
- if (dp1 != dp2) {
- dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
-
- if (S_ISDIR(VFS_I(ip2)->i_mode)) {
- error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
- dp1->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /* transfer ip2 ".." reference to dp1 */
- if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
- error = xfs_droplink(tp, dp2);
- if (error)
- goto out_trans_abort;
- xfs_bumplink(tp, dp1);
- }
-
- /*
- * Although ip1 isn't changed here, userspace needs
- * to be warned about the change, so that applications
- * relying on it (like backup ones), will properly
- * notify the change
- */
- ip1_flags |= XFS_ICHGTIME_CHG;
- ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- }
-
- if (S_ISDIR(VFS_I(ip1)->i_mode)) {
- error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
- dp2->i_ino, spaceres);
- if (error)
- goto out_trans_abort;
-
- /* transfer ip1 ".." reference to dp2 */
- if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
- error = xfs_droplink(tp, dp1);
- if (error)
- goto out_trans_abort;
- xfs_bumplink(tp, dp2);
- }
-
- /*
- * Although ip2 isn't changed here, userspace needs
- * to be warned about the change, so that applications
- * relying on it (like backup ones), will properly
- * notify the change
- */
- ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- ip2_flags |= XFS_ICHGTIME_CHG;
- }
- }
-
- /* Schedule parent pointer replacements */
- if (ip1_ppargs) {
- error = xfs_parent_replacename(tp, ip1_ppargs, dp1, name1, dp2,
- name2, ip1);
- if (error)
- goto out_trans_abort;
- }
-
- if (ip2_ppargs) {
- error = xfs_parent_replacename(tp, ip2_ppargs, dp2, name2, dp1,
- name1, ip2);
- if (error)
- goto out_trans_abort;
- }
-
- if (ip1_flags) {
- xfs_trans_ichgtime(tp, ip1, ip1_flags);
- xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
- }
- if (ip2_flags) {
- xfs_trans_ichgtime(tp, ip2, ip2_flags);
- xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
- }
- if (dp2_flags) {
- xfs_trans_ichgtime(tp, dp2, dp2_flags);
- xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
- }
- xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-
- /*
- * Inform our hook clients that we've finished an exchange operation as
- * follows: removed the source and target files from their directories;
- * added the target to the source directory; and added the source to
- * the target directory. All inodes are locked, so it's ok to model a
- * rename this way so long as we say we deleted entries before we add
- * new ones.
- */
- xfs_dir_update_hook(dp1, ip1, -1, name1);
- xfs_dir_update_hook(dp2, ip2, -1, name2);
- xfs_dir_update_hook(dp1, ip2, 1, name1);
- xfs_dir_update_hook(dp2, ip1, 1, name2);
-
- return xfs_finish_rename(tp);
-
-out_trans_abort:
- xfs_trans_cancel(tp);
- return error;
-}
-
/*
* xfs_rename_alloc_whiteout()
*
@@ -3133,12 +2132,17 @@ xfs_rename_alloc_whiteout(
struct xfs_inode *dp,
struct xfs_inode **wip)
{
+ struct xfs_icreate_args args = {
+ .idmap = idmap,
+ .pip = dp,
+ .mode = S_IFCHR | WHITEOUT_MODE,
+ .flags = XFS_ICREATE_TMPFILE,
+ };
struct xfs_inode *tmpfile;
struct qstr name;
int error;
- error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE,
- xfs_has_parent(dp->i_mount), &tmpfile);
+ error = xfs_create_tmpfile(&args, &tmpfile);
if (error)
return error;
@@ -3178,13 +2182,20 @@ xfs_rename(
struct xfs_inode *target_ip,
unsigned int flags)
{
+ struct xfs_dir_update du_src = {
+ .dp = src_dp,
+ .name = src_name,
+ .ip = src_ip,
+ };
+ struct xfs_dir_update du_tgt = {
+ .dp = target_dp,
+ .name = target_name,
+ .ip = target_ip,
+ };
+ struct xfs_dir_update du_wip = { };
struct xfs_mount *mp = src_dp->i_mount;
struct xfs_trans *tp;
- struct xfs_inode *wip = NULL; /* whiteout inode */
struct xfs_inode *inodes[__XFS_SORT_INODES];
- struct xfs_parent_args *src_ppargs = NULL;
- struct xfs_parent_args *tgt_ppargs = NULL;
- struct xfs_parent_args *wip_ppargs = NULL;
int i;
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
@@ -3204,8 +2215,8 @@ xfs_rename(
* appropriately.
*/
if (flags & RENAME_WHITEOUT) {
- error = xfs_rename_alloc_whiteout(idmap, src_name,
- target_dp, &wip);
+ error = xfs_rename_alloc_whiteout(idmap, src_name, target_dp,
+ &du_wip.ip);
if (error)
return error;
@@ -3213,21 +2224,21 @@ xfs_rename(
src_name->type = XFS_DIR3_FT_CHRDEV;
}
- xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
- inodes, &num_inodes);
+ xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, du_wip.ip,
+ inodes, &num_inodes);
- error = xfs_parent_start(mp, &src_ppargs);
+ error = xfs_parent_start(mp, &du_src.ppargs);
if (error)
goto out_release_wip;
- if (wip) {
- error = xfs_parent_start(mp, &wip_ppargs);
+ if (du_wip.ip) {
+ error = xfs_parent_start(mp, &du_wip.ppargs);
if (error)
goto out_src_ppargs;
}
if (target_ip) {
- error = xfs_parent_start(mp, &tgt_ppargs);
+ error = xfs_parent_start(mp, &du_tgt.ppargs);
if (error)
goto out_wip_ppargs;
}
@@ -3235,7 +2246,7 @@ xfs_rename(
retry:
nospace_error = 0;
spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL,
- target_name->len, wip != NULL);
+ target_name->len, du_wip.ip != NULL);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
if (error == -ENOSPC) {
nospace_error = error;
@@ -3250,7 +2261,7 @@ retry:
* We don't allow reservationless renaming when parent pointers are
* enabled because we can't back out if the xattrs must grow.
*/
- if (src_ppargs && nospace_error) {
+ if (du_src.ppargs && nospace_error) {
error = nospace_error;
xfs_trans_cancel(tp);
goto out_tgt_ppargs;
@@ -3282,8 +2293,8 @@ retry:
xfs_trans_ijoin(tp, src_ip, 0);
if (target_ip)
xfs_trans_ijoin(tp, target_ip, 0);
- if (wip)
- xfs_trans_ijoin(tp, wip, 0);
+ if (du_wip.ip)
+ xfs_trans_ijoin(tp, du_wip.ip, 0);
/*
* If we are using project inheritance, we only allow renames
@@ -3298,11 +2309,11 @@ retry:
/* RENAME_EXCHANGE is unique from here on. */
if (flags & RENAME_EXCHANGE) {
- error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
- src_ppargs, target_dp, target_name, target_ip,
- tgt_ppargs, spaceres);
- nospace_error = 0;
- goto out_unlock;
+ error = xfs_dir_exchange_children(tp, &du_src, &du_tgt,
+ spaceres);
+ if (error)
+ goto out_trans_cancel;
+ goto out_commit;
}
/*
@@ -3335,39 +2346,12 @@ retry:
* We don't allow quotaless renaming when parent pointers are enabled
* because we can't back out if the xattrs must grow.
*/
- if (src_ppargs && nospace_error) {
+ if (du_src.ppargs && nospace_error) {
error = nospace_error;
goto out_trans_cancel;
}
/*
- * Check for expected errors before we dirty the transaction
- * so we can return an error without a transaction abort.
- */
- if (target_ip == NULL) {
- /*
- * If there's no space reservation, check the entry will
- * fit before actually inserting it.
- */
- if (!spaceres) {
- error = xfs_dir_canenter(tp, target_dp, target_name);
- if (error)
- goto out_trans_cancel;
- }
- } else {
- /*
- * If target exists and it's a directory, check that whether
- * it can be destroyed.
- */
- if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
- (!xfs_dir_isempty(target_ip) ||
- (VFS_I(target_ip)->i_nlink > 2))) {
- error = -EEXIST;
- goto out_trans_cancel;
- }
- }
-
- /*
* Lock the AGI buffers we need to handle bumping the nlink of the
* whiteout inode off the unlinked list and to handle dropping the
* nlink of the target inode. Per locking order rules, do this in
@@ -3378,7 +2362,7 @@ retry:
* target_ip is either null or an empty directory.
*/
for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
- if (inodes[i] == wip ||
+ if (inodes[i] == du_wip.ip ||
(inodes[i] == target_ip &&
(VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) {
struct xfs_perag *pag;
@@ -3393,188 +2377,29 @@ retry:
}
}
- /*
- * Directory entry creation below may acquire the AGF. Remove
- * the whiteout from the unlinked list first to preserve correct
- * AGI/AGF locking order. This dirties the transaction so failures
- * after this point will abort and log recovery will clean up the
- * mess.
- *
- * For whiteouts, we need to bump the link count on the whiteout
- * inode. After this point, we have a real link, clear the tmpfile
- * state flag from the inode so it doesn't accidentally get misused
- * in future.
- */
- if (wip) {
- struct xfs_perag *pag;
-
- ASSERT(VFS_I(wip)->i_nlink == 0);
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino));
- error = xfs_iunlink_remove(tp, pag, wip);
- xfs_perag_put(pag);
- if (error)
- goto out_trans_cancel;
-
- xfs_bumplink(tp, wip);
- VFS_I(wip)->i_state &= ~I_LINKABLE;
- }
-
- /*
- * Set up the target.
- */
- if (target_ip == NULL) {
- /*
- * If target does not exist and the rename crosses
- * directories, adjust the target directory link count
- * to account for the ".." reference from the new entry.
- */
- error = xfs_dir_createname(tp, target_dp, target_name,
- src_ip->i_ino, spaceres);
- if (error)
- goto out_trans_cancel;
-
- xfs_trans_ichgtime(tp, target_dp,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- if (new_parent && src_is_directory) {
- xfs_bumplink(tp, target_dp);
- }
- } else { /* target_ip != NULL */
- /*
- * Link the source inode under the target name.
- * If the source inode is a directory and we are moving
- * it across directories, its ".." entry will be
- * inconsistent until we replace that down below.
- *
- * In case there is already an entry with the same
- * name at the destination directory, remove it first.
- */
- error = xfs_dir_replace(tp, target_dp, target_name,
- src_ip->i_ino, spaceres);
- if (error)
- goto out_trans_cancel;
-
- xfs_trans_ichgtime(tp, target_dp,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- /*
- * Decrement the link count on the target since the target
- * dir no longer points to it.
- */
- error = xfs_droplink(tp, target_ip);
- if (error)
- goto out_trans_cancel;
-
- if (src_is_directory) {
- /*
- * Drop the link from the old "." entry.
- */
- error = xfs_droplink(tp, target_ip);
- if (error)
- goto out_trans_cancel;
- }
- } /* target_ip != NULL */
-
- /*
- * Remove the source.
- */
- if (new_parent && src_is_directory) {
- /*
- * Rewrite the ".." entry to point to the new
- * directory.
- */
- error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
- target_dp->i_ino, spaceres);
- ASSERT(error != -EEXIST);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * We always want to hit the ctime on the source inode.
- *
- * This isn't strictly required by the standards since the source
- * inode isn't really being changed, but old unix file systems did
- * it and some incremental backup programs won't work without it.
- */
- xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
-
- /*
- * Adjust the link count on src_dp. This is necessary when
- * renaming a directory, either within one parent when
- * the target existed, or across two parent directories.
- */
- if (src_is_directory && (new_parent || target_ip != NULL)) {
-
- /*
- * Decrement link count on src_directory since the
- * entry that's moved no longer points to it.
- */
- error = xfs_droplink(tp, src_dp);
- if (error)
- goto out_trans_cancel;
- }
-
- /*
- * For whiteouts, we only need to update the source dirent with the
- * inode number of the whiteout inode rather than removing it
- * altogether.
- */
- if (wip)
- error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
- spaceres);
- else
- error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
- spaceres);
-
+ error = xfs_dir_rename_children(tp, &du_src, &du_tgt, spaceres,
+ &du_wip);
if (error)
goto out_trans_cancel;
- /* Schedule parent pointer updates. */
- if (wip_ppargs) {
- error = xfs_parent_addname(tp, wip_ppargs, src_dp, src_name,
- wip);
- if (error)
- goto out_trans_cancel;
- }
-
- if (src_ppargs) {
- error = xfs_parent_replacename(tp, src_ppargs, src_dp,
- src_name, target_dp, target_name, src_ip);
- if (error)
- goto out_trans_cancel;
- }
-
- if (tgt_ppargs) {
- error = xfs_parent_removename(tp, tgt_ppargs, target_dp,
- target_name, target_ip);
- if (error)
- goto out_trans_cancel;
+ if (du_wip.ip) {
+ /*
+ * Now we have a real link, clear the "I'm a tmpfile" state
+ * flag from the inode so it doesn't accidentally get misused in
+ * future.
+ */
+ VFS_I(du_wip.ip)->i_state &= ~I_LINKABLE;
}
- xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
- if (new_parent)
- xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
-
+out_commit:
/*
- * Inform our hook clients that we've finished a rename operation as
- * follows: removed the source and target files from their directories;
- * that we've added the source to the target directory; and finally
- * that we've added the whiteout, if there was one. All inodes are
- * locked, so it's ok to model a rename this way so long as we say we
- * deleted entries before we add new ones.
+ * If this is a synchronous mount, make sure that the rename
+ * transaction goes to disk before returning to the user.
*/
- if (target_ip)
- xfs_dir_update_hook(target_dp, target_ip, -1, target_name);
- xfs_dir_update_hook(src_dp, src_ip, -1, src_name);
- xfs_dir_update_hook(target_dp, src_ip, 1, target_name);
- if (wip)
- xfs_dir_update_hook(src_dp, wip, 1, src_name);
+ if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
+ xfs_trans_set_sync(tp);
- error = xfs_finish_rename(tp);
+ error = xfs_trans_commit(tp);
nospace_error = 0;
goto out_unlock;
@@ -3583,14 +2408,14 @@ out_trans_cancel:
out_unlock:
xfs_iunlock_rename(inodes, num_inodes);
out_tgt_ppargs:
- xfs_parent_finish(mp, tgt_ppargs);
+ xfs_parent_finish(mp, du_tgt.ppargs);
out_wip_ppargs:
- xfs_parent_finish(mp, wip_ppargs);
+ xfs_parent_finish(mp, du_wip.ppargs);
out_src_ppargs:
- xfs_parent_finish(mp, src_ppargs);
+ xfs_parent_finish(mp, du_src.ppargs);
out_release_wip:
- if (wip)
- xfs_irele(wip);
+ if (du_wip.ip)
+ xfs_irele(du_wip.ip);
if (error == -ENOSPC && nospace_error)
error = nospace_error;
return error;
@@ -4293,3 +3118,11 @@ xfs_inode_alloc_unitsize(
return XFS_FSB_TO_B(ip->i_mount, blocks);
}
+
+/* Should we always be using copy on write for file writes? */
+bool
+xfs_is_always_cow_inode(
+ struct xfs_inode *ip)
+{
+ return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 292b90b5f2ac..51defdebef30 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -8,6 +8,7 @@
#include "xfs_inode_buf.h"
#include "xfs_inode_fork.h"
+#include "xfs_inode_util.h"
/*
* Kernel only inode definitions
@@ -270,15 +271,6 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
return ret;
}
-static inline prid_t
-xfs_get_initial_prid(struct xfs_inode *dp)
-{
- if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
- return dp->i_projid;
-
- return XFS_PROJID_DEFAULT;
-}
-
static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
@@ -292,6 +284,13 @@ static inline bool xfs_is_metadata_inode(struct xfs_inode *ip)
xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
}
+bool xfs_is_always_cow_inode(struct xfs_inode *ip);
+
+static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
+{
+ return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
+}
+
/*
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
@@ -517,12 +516,9 @@ int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct mnt_idmap *idmap,
- struct xfs_inode *dp, struct xfs_name *name,
- umode_t mode, dev_t rdev, bool need_xattr,
- struct xfs_inode **ipp);
-int xfs_create_tmpfile(struct mnt_idmap *idmap,
- struct xfs_inode *dp, umode_t mode, bool init_xattrs,
+int xfs_create(const struct xfs_icreate_args *iargs,
+ struct xfs_name *name, struct xfs_inode **ipp);
+int xfs_create_tmpfile(const struct xfs_icreate_args *iargs,
struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
@@ -542,7 +538,6 @@ void xfs_assert_ilocked(struct xfs_inode *, uint);
uint xfs_ilock_data_map_shared(struct xfs_inode *);
uint xfs_ilock_attr_map_shared(struct xfs_inode *);
-uint xfs_ip2xflags(struct xfs_inode *);
int xfs_ifree(struct xfs_trans *, struct xfs_inode *);
int xfs_itruncate_extents_flags(struct xfs_trans **,
struct xfs_inode *, int, xfs_fsize_t, int);
@@ -556,13 +551,8 @@ int xfs_iflush_cluster(struct xfs_buf *);
void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
struct xfs_inode *ip1, uint ip1_mode);
-xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
-xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
-
-int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp,
- struct xfs_inode *pip, xfs_ino_t ino, umode_t mode,
- xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs,
- struct xfs_inode **ipp);
+int xfs_icreate(struct xfs_trans *tp, xfs_ino_t ino,
+ const struct xfs_icreate_args *args, struct xfs_inode **ipp);
static inline int
xfs_itruncate_extents(
@@ -616,18 +606,15 @@ extern struct kmem_cache *xfs_inode_cache;
bool xfs_inode_needs_inactive(struct xfs_inode *ip);
-int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
-int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
- struct xfs_inode *ip);
struct xfs_inode *xfs_iunlink_lookup(struct xfs_perag *pag, xfs_agino_t agino);
+int xfs_iunlink_reload_next(struct xfs_trans *tp, struct xfs_buf *agibp,
+ xfs_agino_t prev_agino, xfs_agino_t next_agino);
void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2);
-int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip);
-void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip);
void xfs_lock_inodes(struct xfs_inode **ips, int inodes, uint lock_mode);
void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes);
@@ -645,29 +632,8 @@ void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_filblks_t *dblocks, xfs_filblks_t *rblocks);
unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip);
-struct xfs_dir_update_params {
- const struct xfs_inode *dp;
- const struct xfs_inode *ip;
- const struct xfs_name *name;
- int delta;
-};
-
-#ifdef CONFIG_XFS_LIVE_HOOKS
-void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip,
- int delta, const struct xfs_name *name);
-
-struct xfs_dir_hook {
- struct xfs_hook dirent_hook;
-};
-
-void xfs_dir_hook_disable(void);
-void xfs_dir_hook_enable(void);
-
-int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook);
-void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook);
-void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn);
-#else
-# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0)
-#endif /* CONFIG_XFS_LIVE_HOOKS */
+int xfs_icreate_dqalloc(const struct xfs_icreate_args *args,
+ struct xfs_dquot **udqpp, struct xfs_dquot **gdqpp,
+ struct xfs_dquot **pdqpp);
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f28d653300d1..ef05cbbe116c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -37,6 +37,36 @@ xfs_inode_item_sort(
return INODE_ITEM(lip)->ili_inode->i_ino;
}
+#ifdef DEBUG_EXPENSIVE
+static void
+xfs_inode_item_precommit_check(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dinode *dip;
+ xfs_failaddr_t fa;
+
+ dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS);
+ if (!dip) {
+ ASSERT(dip != NULL);
+ return;
+ }
+
+ xfs_inode_to_disk(ip, dip, 0);
+ xfs_dinode_calc_crc(mp, dip);
+ fa = xfs_dinode_verify(mp, ip->i_ino, dip);
+ if (fa) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+ sizeof(*dip), fa);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+ kfree(dip);
+}
+#else
+# define xfs_inode_item_precommit_check(ip) ((void)0)
+#endif
+
/*
* Prior to finally logging the inode, we have to ensure that all the
* per-modification inode state changes are applied. This includes VFS inode
@@ -169,6 +199,8 @@ xfs_inode_item_precommit(
iip->ili_fields |= (flags | iip->ili_last_fields);
spin_unlock(&iip->ili_lock);
+ xfs_inode_item_precommit_check(ip);
+
/*
* We are done with the log item transaction dirty state, so clear it so
* that it doesn't pollute future transactions.
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f0117188f302..4e933db75b12 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -469,66 +469,6 @@ xfs_fileattr_get(
return 0;
}
-STATIC uint16_t
-xfs_flags2diflags(
- struct xfs_inode *ip,
- unsigned int xflags)
-{
- /* can't set PREALLOC this way, just preserve it */
- uint16_t di_flags =
- (ip->i_diflags & XFS_DIFLAG_PREALLOC);
-
- if (xflags & FS_XFLAG_IMMUTABLE)
- di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & FS_XFLAG_APPEND)
- di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & FS_XFLAG_SYNC)
- di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & FS_XFLAG_NOATIME)
- di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & FS_XFLAG_NODUMP)
- di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & FS_XFLAG_NODEFRAG)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & FS_XFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
- if (S_ISDIR(VFS_I(ip)->i_mode)) {
- if (xflags & FS_XFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & FS_XFLAG_NOSYMLINKS)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & FS_XFLAG_EXTSZINHERIT)
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- if (xflags & FS_XFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(VFS_I(ip)->i_mode)) {
- if (xflags & FS_XFLAG_REALTIME)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & FS_XFLAG_EXTSIZE)
- di_flags |= XFS_DIFLAG_EXTSIZE;
- }
-
- return di_flags;
-}
-
-STATIC uint64_t
-xfs_flags2diflags2(
- struct xfs_inode *ip,
- unsigned int xflags)
-{
- uint64_t di_flags2 =
- (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
- XFS_DIFLAG2_BIGTIME |
- XFS_DIFLAG2_NREXT64));
-
- if (xflags & FS_XFLAG_DAX)
- di_flags2 |= XFS_DIFLAG2_DAX;
- if (xflags & FS_XFLAG_COWEXTSIZE)
- di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
-
- return di_flags2;
-}
-
static int
xfs_ioctl_setattr_xflags(
struct xfs_trans *tp,
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff222827e550..07f736c42460 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -26,6 +26,7 @@
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
#include "xfs_file.h"
+#include "xfs_bmap.h"
#include <linux/posix_acl.h>
#include <linux/security.h>
@@ -157,8 +158,6 @@ xfs_create_need_xattr(
if (dir->i_sb->s_security)
return true;
#endif
- if (xfs_has_parent(XFS_I(dir)->i_mount))
- return true;
return false;
}
@@ -172,49 +171,55 @@ xfs_generic_create(
dev_t rdev,
struct file *tmpfile) /* unnamed file */
{
- struct inode *inode;
- struct xfs_inode *ip = NULL;
- struct posix_acl *default_acl, *acl;
- struct xfs_name name;
- int error;
+ struct xfs_icreate_args args = {
+ .idmap = idmap,
+ .pip = XFS_I(dir),
+ .rdev = rdev,
+ .mode = mode,
+ };
+ struct inode *inode;
+ struct xfs_inode *ip = NULL;
+ struct posix_acl *default_acl, *acl;
+ struct xfs_name name;
+ int error;
/*
* Irix uses Missed'em'V split, but doesn't want to see
* the upper 5 bits of (14bit) major.
*/
- if (S_ISCHR(mode) || S_ISBLK(mode)) {
- if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+ if (S_ISCHR(args.mode) || S_ISBLK(args.mode)) {
+ if (unlikely(!sysv_valid_dev(args.rdev) ||
+ MAJOR(args.rdev) & ~0x1ff))
return -EINVAL;
} else {
- rdev = 0;
+ args.rdev = 0;
}
- error = posix_acl_create(dir, &mode, &default_acl, &acl);
+ error = posix_acl_create(dir, &args.mode, &default_acl, &acl);
if (error)
return error;
/* Verify mode is valid also for tmpfile case */
- error = xfs_dentry_mode_to_name(&name, dentry, mode);
+ error = xfs_dentry_mode_to_name(&name, dentry, args.mode);
if (unlikely(error))
goto out_free_acl;
if (!tmpfile) {
- error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev,
- xfs_create_need_xattr(dir, default_acl, acl),
- &ip);
+ if (xfs_create_need_xattr(dir, default_acl, acl))
+ args.flags |= XFS_ICREATE_INIT_XATTRS;
+
+ error = xfs_create(&args, &name, &ip);
} else {
- bool init_xattrs = false;
+ args.flags |= XFS_ICREATE_TMPFILE;
/*
- * If this temporary file will be linkable, set up the file
- * with an attr fork to receive a parent pointer.
+ * If this temporary file will not be linkable, don't bother
+ * creating an attr fork to receive a parent pointer.
*/
- if (!(tmpfile->f_flags & O_EXCL) &&
- xfs_has_parent(XFS_I(dir)->i_mount))
- init_xattrs = true;
+ if (tmpfile->f_flags & O_EXCL)
+ args.flags |= XFS_ICREATE_UNLINKABLE;
- error = xfs_create_tmpfile(idmap, XFS_I(dir), mode,
- init_xattrs, &ip);
+ error = xfs_create_tmpfile(&args, &ip);
}
if (unlikely(error))
goto out_free_acl;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ac355328121a..54a098fe7285 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -135,8 +135,6 @@ typedef __u32 xfs_nlink_t;
*/
#define __this_address ({ __label__ __here; __here: barrier(); &&__here; })
-#define XFS_PROJID_DEFAULT 0
-
#define howmany(x, y) (((x)+((y)-1))/(y))
static inline void delay(long ticks)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 47120b745c47..9490b913a4ab 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -793,12 +793,15 @@ xfs_qm_qino_alloc(
return error;
if (need_alloc) {
+ struct xfs_icreate_args args = {
+ .mode = S_IFREG,
+ .flags = XFS_ICREATE_UNLINKABLE,
+ };
xfs_ino_t ino;
error = xfs_dialloc(&tp, 0, S_IFREG, &ino);
if (!error)
- error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino,
- S_IFREG, 1, 0, 0, false, ipp);
+ error = xfs_icreate(tp, ino, &args, ipp);
if (error) {
xfs_trans_cancel(tp);
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 65c5dfe17ecf..fb55e4ce49fa 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -6,16 +6,6 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
-static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip)
-{
- return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
-}
-
-static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
-{
- return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
-}
-
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared);
int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 17aee806ec2e..77f19e2f66e0 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -90,19 +90,25 @@ xfs_symlink(
struct xfs_inode **ipp)
{
struct xfs_mount *mp = dp->i_mount;
+ struct xfs_icreate_args args = {
+ .idmap = idmap,
+ .pip = dp,
+ .mode = S_IFLNK | (mode & ~S_IFMT),
+ };
+ struct xfs_dir_update du = {
+ .dp = dp,
+ .name = link_name,
+ };
struct xfs_trans *tp = NULL;
- struct xfs_inode *ip = NULL;
int error = 0;
int pathlen;
bool unlock_dp_on_error = false;
xfs_filblks_t fs_blocks;
- prid_t prid;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
+ struct xfs_dquot *udqp;
+ struct xfs_dquot *gdqp;
+ struct xfs_dquot *pdqp;
uint resblks;
xfs_ino_t ino;
- struct xfs_parent_args *ppargs;
*ipp = NULL;
@@ -119,15 +125,8 @@ xfs_symlink(
return -ENAMETOOLONG;
ASSERT(pathlen > 0);
- prid = xfs_get_initial_prid(dp);
-
- /*
- * Make sure that we have allocated dquot(s) on disk.
- */
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
- mapped_fsgid(idmap, &init_user_ns), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ /* Make sure that we have allocated dquot(s) on disk. */
+ error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -143,7 +142,7 @@ xfs_symlink(
fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks);
- error = xfs_parent_start(mp, &ppargs);
+ error = xfs_parent_start(mp, &du.ppargs);
if (error)
goto out_release_dquots;
@@ -168,9 +167,7 @@ xfs_symlink(
*/
error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino);
if (!error)
- error = xfs_init_new_inode(idmap, tp, dp, ino,
- S_IFLNK | (mode & ~S_IFMT), 1, 0, prid,
- xfs_has_parent(mp), &ip);
+ error = xfs_icreate(tp, ino, &args, &du.ip);
if (error)
goto out_trans_cancel;
@@ -186,33 +183,22 @@ xfs_symlink(
/*
* Also attach the dquot(s) to it, if applicable.
*/
- xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+ xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
resblks -= XFS_IALLOC_SPACE_RES(mp);
- error = xfs_symlink_write_target(tp, ip, ip->i_ino, target_path,
+ error = xfs_symlink_write_target(tp, du.ip, du.ip->i_ino, target_path,
pathlen, fs_blocks, resblks);
if (error)
goto out_trans_cancel;
resblks -= fs_blocks;
- i_size_write(VFS_I(ip), ip->i_disk_size);
+ i_size_write(VFS_I(du.ip), du.ip->i_disk_size);
/*
* Create the directory entry for the symlink.
*/
- error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks);
+ error = xfs_dir_create_child(tp, resblks, &du);
if (error)
goto out_trans_cancel;
- xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-
- /* Add parent pointer for the new symlink. */
- if (ppargs) {
- error = xfs_parent_addname(tp, ppargs, dp, link_name, ip);
- if (error)
- goto out_trans_cancel;
- }
-
- xfs_dir_update_hook(dp, ip, 1, link_name);
/*
* If this is a synchronous mount, make sure that the
@@ -230,10 +216,10 @@ xfs_symlink(
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
- *ipp = ip;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ *ipp = du.ip;
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
return 0;
out_trans_cancel:
@@ -244,13 +230,13 @@ out_release_inode:
* setup of the inode and release the inode. This prevents recursive
* transactions and deadlocks from xfs_inactive.
*/
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_finish_inode_setup(ip);
- xfs_irele(ip);
+ if (du.ip) {
+ xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
+ xfs_finish_inode_setup(du.ip);
+ xfs_irele(du.ip);
}
out_parent:
- xfs_parent_finish(mp, ppargs);
+ xfs_parent_finish(mp, du.ppargs);
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 1636663707dc..f97e8c68641f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -224,7 +224,6 @@ void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
bool xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
-void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint,
uint);