aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandan Babu R <[email protected]>2024-04-16 12:42:17 +0530
committerChandan Babu R <[email protected]>2024-04-16 12:42:17 +0530
commit9ba8e658d867be96f9da58f060deff1e9cbca1a9 (patch)
treef6ccd2cd5461c1760cb13f7bf3bff9087d080e54
parent1eef01250de4d2c6f779d3bc515bf7b7f3644f3b (diff)
parent1a5f6e08d4e379a23da5be974aee50b26a20c5b0 (diff)
Merge tag 'inode-repair-improvements-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeA
xfs: inode-related repair fixes While doing QA of the online fsck code, I made a few observations: First, nobody was checking that the di_onlink field is actually zero; Second, that allocating a temporary file for repairs can fail (and thus bring down the entire fs) if the inode cluster is corrupt; and Third, that file link counts do not pin at ~0U to prevent integer overflows. Fourth, the x{chk,rep}_metadata_inode_fork functions should be subclassing the main scrub context, not modifying the parent's setup willy-nilly. This scattered patchset fixes those three problems. Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: Chandan Babu R <[email protected]> * tag 'inode-repair-improvements-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype xfs: pin inodes that would otherwise overflow link count xfs: try to avoid allocating from sick inode clusters xfs: check unused nlink fields in the ondisk inode
-rw-r--r--fs/xfs/libxfs/xfs_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c40
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c8
-rw-r--r--fs/xfs/scrub/common.c23
-rw-r--r--fs/xfs/scrub/dir_repair.c11
-rw-r--r--fs/xfs/scrub/inode_repair.c12
-rw-r--r--fs/xfs/scrub/nlinks.c4
-rw-r--r--fs/xfs/scrub/nlinks_repair.c8
-rw-r--r--fs/xfs/scrub/repair.c67
-rw-r--r--fs/xfs/scrub/scrub.c63
-rw-r--r--fs/xfs/scrub/scrub.h11
-rw-r--r--fs/xfs/xfs_inode.c33
12 files changed, 187 insertions, 99 deletions
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 10153ce116d4..f1818c54af6f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -900,6 +900,12 @@ static inline uint xfs_dinode_size(int version)
#define XFS_MAXLINK ((1U << 31) - 1U)
/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
+/*
* Values for di_format
*
* This enum is used in string mapping in xfs_trace.h; please keep the
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index cb37f0007731..14c81f227c5b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1058,6 +1058,33 @@ xfs_inobt_first_free_inode(
}
/*
+ * If this AG has corrupt inodes, check if allocating this inode would fail
+ * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
+ * somewhere else.
+ */
+static int
+xfs_dialloc_check_ino(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ xfs_ino_t ino)
+{
+ struct xfs_imap imap;
+ struct xfs_buf *bp;
+ int error;
+
+ error = xfs_imap(pag, tp, ino, &imap, 0);
+ if (error)
+ return -EAGAIN;
+
+ error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp);
+ if (error)
+ return -EAGAIN;
+
+ xfs_trans_brelse(tp, bp);
+ return 0;
+}
+
+/*
* Allocate an inode using the inobt-only algorithm.
*/
STATIC int
@@ -1309,6 +1336,13 @@ alloc_inode:
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error0;
+ }
+
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
error = xfs_inobt_update(cur, &rec);
@@ -1584,6 +1618,12 @@ xfs_dialloc_ag(
XFS_INODES_PER_CHUNK) == 0);
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error_cur;
+ }
+
/*
* Modify or remove the finobt record.
*/
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d0dcce462bf4..d79002343d0b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -491,6 +491,14 @@ xfs_dinode_verify(
return __this_address;
}
+ if (dip->di_version > 1) {
+ if (dip->di_onlink)
+ return __this_address;
+ } else {
+ if (dip->di_nlink)
+ return __this_address;
+ }
+
/* don't allow invalid i_size */
di_size = be64_to_cpu(dip->di_size);
if (di_size & (1ULL << 63))
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index a2da2bef509a..48302532d10d 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1203,27 +1203,12 @@ xchk_metadata_inode_subtype(
struct xfs_scrub *sc,
unsigned int scrub_type)
{
- __u32 smtype = sc->sm->sm_type;
- unsigned int sick_mask = sc->sick_mask;
+ struct xfs_scrub_subord *sub;
int error;
- sc->sm->sm_type = scrub_type;
-
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xchk_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xchk_bmap_data(sc);
- break;
- default:
- ASSERT(0);
- error = -EFSCORRUPTED;
- break;
- }
-
- sc->sick_mask = sick_mask;
- sc->sm->sm_type = smtype;
+ sub = xchk_scrub_create_subord(sc, scrub_type);
+ error = sub->sc.ops->scrub(&sub->sc);
+ xchk_scrub_free_subord(sub);
return error;
}
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index c150b2efa2c2..38957da26b94 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1145,7 +1145,9 @@ xrep_dir_set_nlink(
struct xfs_scrub *sc = rd->sc;
struct xfs_inode *dp = sc->ip;
struct xfs_perag *pag;
- unsigned int new_nlink = rd->subdirs + 2;
+ unsigned int new_nlink = min_t(unsigned long long,
+ rd->subdirs + 2,
+ XFS_NLINK_PINNED);
int error;
/*
@@ -1202,13 +1204,6 @@ xrep_dir_swap(
int error = 0;
/*
- * If we found enough subdirs to overflow this directory's link count,
- * bail out to userspace before we modify anything.
- */
- if (rd->subdirs + 2 > XFS_MAXLINK)
- return -EFSCORRUPTED;
-
- /*
* If we never found the parent for this directory, temporarily assign
* the root dir as the parent; we'll move this to the orphanage after
* exchanging the dir contents. We hold the ILOCK of the dir being
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 0dde5df2f8d3..e3b74ea50fde 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -516,6 +516,17 @@ xrep_dinode_mode(
return 0;
}
+/* Fix unused link count fields having nonzero values. */
+STATIC void
+xrep_dinode_nlinks(
+ struct xfs_dinode *dip)
+{
+ if (dip->di_version > 1)
+ dip->di_onlink = 0;
+ else
+ dip->di_nlink = 0;
+}
+
/* Fix any conflicting flags that the verifiers complain about. */
STATIC void
xrep_dinode_flags(
@@ -1377,6 +1388,7 @@ xrep_dinode_core(
iget_error = xrep_dinode_mode(ri, dip);
if (iget_error)
goto write;
+ xrep_dinode_nlinks(dip);
xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
xrep_dinode_size(ri, dip);
xrep_dinode_extsize_hints(sc, dip);
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index c456523fac9c..fcb9c473f372 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -607,9 +607,11 @@ xchk_nlinks_compare_inode(
* this as a corruption. The VFS won't let users increase the link
* count, but it will let them decrease it.
*/
- if (total_links > XFS_MAXLINK) {
+ if (total_links > XFS_NLINK_PINNED) {
xchk_ino_set_corrupt(sc, ip->i_ino);
goto out_corrupt;
+ } else if (total_links > XFS_MAXLINK) {
+ xchk_ino_set_warning(sc, ip->i_ino);
}
/* Link counts should match. */
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 0cb67339eac8..83f8637bb08f 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -238,14 +238,10 @@ xrep_nlinks_repair_inode(
/* Commit the new link count if it changed. */
if (total_links != actual_nlink) {
- if (total_links > XFS_MAXLINK) {
- trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
- goto out_trans;
- }
-
trace_xrep_nlinks_update_inode(mp, ip, &obs);
- set_nlink(VFS_I(ip), total_links);
+ set_nlink(VFS_I(ip), min_t(unsigned long long, total_links,
+ XFS_NLINK_PINNED));
dirty = true;
}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 369f0430e4ba..b6aff89679d5 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1009,55 +1009,27 @@ xrep_metadata_inode_subtype(
struct xfs_scrub *sc,
unsigned int scrub_type)
{
- __u32 smtype = sc->sm->sm_type;
- __u32 smflags = sc->sm->sm_flags;
- unsigned int sick_mask = sc->sick_mask;
+ struct xfs_scrub_subord *sub;
int error;
/*
- * Let's see if the inode needs repair. We're going to open-code calls
- * to the scrub and repair functions so that we can hang on to the
+ * Let's see if the inode needs repair. Use a subordinate scrub context
+ * to call the scrub and repair functions so that we can hang on to the
* resources that we already acquired instead of using the standard
* setup/teardown routines.
*/
- sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
- sc->sm->sm_type = scrub_type;
-
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xchk_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xchk_bmap_data(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTA:
- error = xchk_bmap_attr(sc);
- break;
- default:
- ASSERT(0);
- error = -EFSCORRUPTED;
- }
+ sub = xchk_scrub_create_subord(sc, scrub_type);
+ error = sub->sc.ops->scrub(&sub->sc);
if (error)
goto out;
-
- if (!xrep_will_attempt(sc))
+ if (!xrep_will_attempt(&sub->sc))
goto out;
/*
* Repair some part of the inode. This will potentially join the inode
* to the transaction.
*/
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xrep_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xrep_bmap(sc, XFS_DATA_FORK, false);
- break;
- case XFS_SCRUB_TYPE_BMBTA:
- error = xrep_bmap(sc, XFS_ATTR_FORK, false);
- break;
- }
+ error = sub->sc.ops->repair(&sub->sc);
if (error)
goto out;
@@ -1066,10 +1038,10 @@ xrep_metadata_inode_subtype(
* that the inode will not be joined to the transaction when we exit
* the function.
*/
- error = xfs_defer_finish(&sc->tp);
+ error = xfs_defer_finish(&sub->sc.tp);
if (error)
goto out;
- error = xfs_trans_roll(&sc->tp);
+ error = xfs_trans_roll(&sub->sc.tp);
if (error)
goto out;
@@ -1077,31 +1049,18 @@ xrep_metadata_inode_subtype(
* Clear the corruption flags and re-check the metadata that we just
* repaired.
*/
- sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
-
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xchk_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xchk_bmap_data(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTA:
- error = xchk_bmap_attr(sc);
- break;
- }
+ sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ error = sub->sc.ops->scrub(&sub->sc);
if (error)
goto out;
/* If corruption persists, the repair has failed. */
- if (xchk_needs_repair(sc->sm)) {
+ if (xchk_needs_repair(sub->sc.sm)) {
error = -EFSCORRUPTED;
goto out;
}
out:
- sc->sick_mask = sick_mask;
- sc->sm->sm_type = smtype;
- sc->sm->sm_flags = smflags;
+ xchk_scrub_free_subord(sub);
return error;
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 301d5b753fdd..ebb06838c31b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -177,6 +177,39 @@ xchk_fsgates_disable(
}
#undef FSGATES_MASK
+/* Free the resources associated with a scrub subtype. */
+void
+xchk_scrub_free_subord(
+ struct xfs_scrub_subord *sub)
+{
+ struct xfs_scrub *sc = sub->parent_sc;
+
+ ASSERT(sc->ip == sub->sc.ip);
+ ASSERT(sc->orphanage == sub->sc.orphanage);
+ ASSERT(sc->tempip == sub->sc.tempip);
+
+ sc->sm->sm_type = sub->old_smtype;
+ sc->sm->sm_flags = sub->old_smflags |
+ (sc->sm->sm_flags & XFS_SCRUB_FLAGS_OUT);
+ sc->tp = sub->sc.tp;
+
+ if (sub->sc.buf) {
+ if (sub->sc.buf_cleanup)
+ sub->sc.buf_cleanup(sub->sc.buf);
+ kvfree(sub->sc.buf);
+ }
+ if (sub->sc.xmbtp)
+ xmbuf_free(sub->sc.xmbtp);
+ if (sub->sc.xfile)
+ xfile_destroy(sub->sc.xfile);
+
+ sc->ilock_flags = sub->sc.ilock_flags;
+ sc->orphanage_ilock_flags = sub->sc.orphanage_ilock_flags;
+ sc->temp_ilock_flags = sub->sc.temp_ilock_flags;
+
+ kfree(sub);
+}
+
/* Free all the resources and finish the transactions. */
STATIC int
xchk_teardown(
@@ -505,6 +538,36 @@ static inline void xchk_postmortem(struct xfs_scrub *sc)
}
#endif /* CONFIG_XFS_ONLINE_REPAIR */
+/*
+ * Create a new scrub context from an existing one, but with a different scrub
+ * type.
+ */
+struct xfs_scrub_subord *
+xchk_scrub_create_subord(
+ struct xfs_scrub *sc,
+ unsigned int subtype)
+{
+ struct xfs_scrub_subord *sub;
+
+ sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS);
+ if (!sub)
+ return ERR_PTR(-ENOMEM);
+
+ sub->old_smtype = sc->sm->sm_type;
+ sub->old_smflags = sc->sm->sm_flags;
+ sub->parent_sc = sc;
+ memcpy(&sub->sc, sc, sizeof(struct xfs_scrub));
+ sub->sc.ops = &meta_scrub_ops[subtype];
+ sub->sc.sm->sm_type = subtype;
+ sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ sub->sc.buf = NULL;
+ sub->sc.buf_cleanup = NULL;
+ sub->sc.xfile = NULL;
+ sub->sc.xmbtp = NULL;
+
+ return sub;
+}
+
/* Dispatch metadata scrubbing. */
int
xfs_scrub_metadata(
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 7abe498f7a46..54a4242bc79c 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -156,6 +156,17 @@ struct xfs_scrub {
*/
#define XREP_FSGATES_ALL (XREP_FSGATES_EXCHANGE_RANGE)
+struct xfs_scrub_subord {
+ struct xfs_scrub sc;
+ struct xfs_scrub *parent_sc;
+ unsigned int old_smtype;
+ unsigned int old_smflags;
+};
+
+struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
+ unsigned int subtype);
+void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
+
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index fed0cd6bffdf..03dcb4ac0431 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -890,22 +890,25 @@ xfs_init_new_inode(
*/
static int /* error */
xfs_droplink(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- if (VFS_I(ip)->i_nlink == 0) {
- xfs_alert(ip->i_mount,
- "%s: Attempt to drop inode (%llu) with nlink zero.",
- __func__, ip->i_ino);
- return -EFSCORRUPTED;
- }
+ struct inode *inode = VFS_I(ip);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- drop_nlink(VFS_I(ip));
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (VFS_I(ip)->i_nlink)
+ if (inode->i_nlink)
return 0;
return xfs_iunlink(tp, ip);
@@ -919,9 +922,17 @@ xfs_bumplink(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
+ struct inode *inode = VFS_I(ip);
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- inc_nlink(VFS_I(ip));
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}