diff options
34 files changed, 2185 insertions, 85 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 7e1df6fdaaad..561ab59b9422 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -184,6 +184,7 @@ xfs-y += $(addprefix scrub/, \ agheader_repair.o \ alloc_repair.o \ ialloc_repair.o \ + inode_repair.o \ newbt.o \ reap.o \ refcount_repair.o \ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 654e17e6610d..5d1ab4978f32 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1040,23 +1040,16 @@ xfs_attr_shortform_allfit( return xfs_attr_shortform_bytesfit(dp, bytes); } -/* Verify the consistency of an inline attribute fork. */ +/* Verify the consistency of a raw inline attribute fork. */ xfs_failaddr_t xfs_attr_shortform_verify( - struct xfs_inode *ip) + struct xfs_attr_shortform *sfp, + size_t size) { - struct xfs_attr_shortform *sfp; struct xfs_attr_sf_entry *sfep; struct xfs_attr_sf_entry *next_sfep; char *endp; - struct xfs_ifork *ifp; int i; - int64_t size; - - ASSERT(ip->i_af.if_format == XFS_DINODE_FMT_LOCAL); - ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); - sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; - size = ifp->if_bytes; /* * Give up if the attribute is way too short. diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 368f4d9fa1d5..ce6743463c86 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -56,7 +56,8 @@ int xfs_attr_sf_findname(struct xfs_da_args *args, unsigned int *basep); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); -xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); +xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_attr_shortform *sfp, + size_t size); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index e308d2f44a3c..a073ca877ced 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -6168,19 +6168,18 @@ xfs_bmap_finish_one( return error; } -/* Check that an inode's extent does not have invalid flags or bad ranges. */ +/* Check that an extent does not have invalid flags or bad ranges. */ xfs_failaddr_t -xfs_bmap_validate_extent( - struct xfs_inode *ip, +xfs_bmap_validate_extent_raw( + struct xfs_mount *mp, + bool rtfile, int whichfork, struct xfs_bmbt_irec *irec) { - struct xfs_mount *mp = ip->i_mount; - if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) return __this_address; - if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) { + if (rtfile && whichfork == XFS_DATA_FORK) { if (!xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) return __this_address; @@ -6210,3 +6209,14 @@ xfs_bmap_intent_destroy_cache(void) kmem_cache_destroy(xfs_bmap_intent_cache); xfs_bmap_intent_cache = NULL; } + +/* Check that an inode's extent does not have invalid flags or bad ranges. */ +xfs_failaddr_t +xfs_bmap_validate_extent( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *irec) +{ + return xfs_bmap_validate_extent_raw(ip->i_mount, + XFS_IS_REALTIME_INODE(ip), whichfork, irec); +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index e33470e39728..8518324db285 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -263,6 +263,8 @@ static inline uint32_t xfs_bmap_fork_to_state(int whichfork) } } +xfs_failaddr_t xfs_bmap_validate_extent_raw(struct xfs_mount *mp, bool rtfile, + int whichfork, struct xfs_bmbt_irec *irec); xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *irec); int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork, diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 7404a9ff1a92..1db2e60ba827 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -175,7 +175,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); +xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, int64_t size); int xfs_dir2_sf_entsize(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr, int len); void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 8cd37e6e9d38..870ef1d1ebe4 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -707,11 +707,10 @@ xfs_dir2_sf_check( /* Verify the consistency of an inline directory. */ xfs_failaddr_t xfs_dir2_sf_verify( - struct xfs_inode *ip) + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + int64_t size) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); - struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next_sfep; char *endp; @@ -719,15 +718,9 @@ xfs_dir2_sf_verify( int i; int i8count; int offset; - int64_t size; int error; uint8_t filetype; - ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - - sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; - size = ifp->if_bytes; - /* * Give up if the directory is way too short. */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9a88aba1589f..f16974126ff9 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1008,7 +1008,7 @@ enum xfs_dinode_fmt { * Return pointers to the data or attribute forks. */ #define XFS_DFORK_DPTR(dip) \ - ((char *)dip + xfs_dinode_size(dip->di_version)) + ((void *)dip + xfs_dinode_size(dip->di_version)) #define XFS_DFORK_APTR(dip) \ (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) #define XFS_DFORK_PTR(dip,w) \ diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 99e796256c5d..6296993ff8f3 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -68,6 +68,11 @@ struct xfs_fsop_geom; #define XFS_SICK_INO_SYMLINK (1 << 6) /* symbolic link remote target */ #define XFS_SICK_INO_PARENT (1 << 7) /* parent pointers */ +#define XFS_SICK_INO_BMBTD_ZAPPED (1 << 8) /* data fork erased */ +#define XFS_SICK_INO_BMBTA_ZAPPED (1 << 9) /* attr fork erased */ +#define XFS_SICK_INO_DIR_ZAPPED (1 << 10) /* directory erased */ +#define XFS_SICK_INO_SYMLINK_ZAPPED (1 << 11) /* symlink erased */ + /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ XFS_SICK_FS_UQUOTA | \ @@ -97,6 +102,11 @@ struct xfs_fsop_geom; XFS_SICK_INO_SYMLINK | \ XFS_SICK_INO_PARENT) +#define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \ + XFS_SICK_INO_BMBTA_ZAPPED | \ + XFS_SICK_INO_DIR_ZAPPED | \ + XFS_SICK_INO_SYMLINK_ZAPPED) + /* These functions must be provided by the xfs implementation. */ void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5a2e7ddfa76d..dad8ea832c20 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -702,12 +702,22 @@ xfs_ifork_verify_local_data( xfs_failaddr_t fa = NULL; switch (VFS_I(ip)->i_mode & S_IFMT) { - case S_IFDIR: - fa = xfs_dir2_sf_verify(ip); + case S_IFDIR: { + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); + struct xfs_dir2_sf_hdr *sfp; + + sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; + fa = xfs_dir2_sf_verify(mp, sfp, ifp->if_bytes); break; - case S_IFLNK: - fa = xfs_symlink_shortform_verify(ip); + } + case S_IFLNK: { + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); + + fa = xfs_symlink_shortform_verify(ifp->if_u1.if_data, + ifp->if_bytes); break; + } default: break; } @@ -729,11 +739,20 @@ xfs_ifork_verify_local_attr( struct xfs_ifork *ifp = &ip->i_af; xfs_failaddr_t fa; - if (!xfs_inode_has_attr_fork(ip)) + if (!xfs_inode_has_attr_fork(ip)) { fa = __this_address; - else - fa = xfs_attr_shortform_verify(ip); + } else { + struct xfs_attr_shortform *sfp; + struct xfs_ifork *ifp; + int64_t size; + ASSERT(ip->i_af.if_format == XFS_DINODE_FMT_LOCAL); + ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + fa = xfs_attr_shortform_verify(sfp, size); + } if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", ifp->if_u1.if_data, ifp->if_bytes, fa); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c4381388c0c1..4220d3584c1b 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -139,7 +139,7 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); -xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); +xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size); /* Computed inode geometry for the filesystem. */ struct xfs_ino_geometry { diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index bdc777b9ec4a..3c96d1d617fb 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -202,15 +202,11 @@ xfs_symlink_local_to_remote( */ xfs_failaddr_t xfs_symlink_shortform_verify( - struct xfs_inode *ip) + void *sfp, + int64_t size) { - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); - char *sfp = (char *)ifp->if_u1.if_data; - int size = ifp->if_bytes; char *endp = sfp + size; - ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - /* * Zero length symlinks should never occur in memory as they are * never allowed to exist on disk. diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 06d8c1996a33..8175e8c17c14 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -19,9 +19,11 @@ #include "xfs_bmap_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" +#include "xfs_health.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" +#include "scrub/health.h" #include "xfs_ag.h" /* Set us up with an inode's bmap. */ @@ -78,6 +80,10 @@ xchk_setup_inode_bmap( if (error) goto out; + error = xchk_ino_dqattach(sc); + if (error) + goto out; + xchk_ilock(sc, XFS_ILOCK_EXCL); out: /* scrub teardown will unlock and release the inode */ @@ -633,6 +639,82 @@ xchk_bmap_check_ag_rmaps( } /* + * Decide if we want to scan the reverse mappings to determine if the attr + * fork /really/ has zero space mappings. + */ +STATIC bool +xchk_bmap_check_empty_attrfork( + struct xfs_inode *ip) +{ + struct xfs_ifork *ifp = &ip->i_af; + + /* + * If the dinode repair found a bad attr fork, it will reset the fork + * to extents format with zero records and wait for the this scrubber + * to reconstruct the block mappings. If the fork is not in this + * state, then the fork cannot have been zapped. + */ + if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) + return false; + + /* + * Files can have an attr fork in EXTENTS format with zero records for + * several reasons: + * + * a) an attr set created a fork but ran out of space + * b) attr replace deleted an old attr but failed during the set step + * c) the data fork was in btree format when all attrs were deleted, so + * the fork was left in place + * d) the inode repair code zapped the fork + * + * Only in case (d) do we want to scan the rmapbt to see if we need to + * rebuild the attr fork. The fork zap code clears all DAC permission + * bits and zeroes the uid and gid, so avoid the scan if any of those + * three conditions are not met. + */ + if ((VFS_I(ip)->i_mode & 0777) != 0) + return false; + if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID)) + return false; + if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID)) + return false; + + return true; +} + +/* + * Decide if we want to scan the reverse mappings to determine if the data + * fork /really/ has zero space mappings. + */ +STATIC bool +xchk_bmap_check_empty_datafork( + struct xfs_inode *ip) +{ + struct xfs_ifork *ifp = &ip->i_df; + + /* Don't support realtime rmap checks yet. */ + if (XFS_IS_REALTIME_INODE(ip)) + return false; + + /* + * If the dinode repair found a bad data fork, it will reset the fork + * to extents format with zero records and wait for the this scrubber + * to reconstruct the block mappings. If the fork is not in this + * state, then the fork cannot have been zapped. + */ + if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) + return false; + + /* + * If we encounter an empty data fork along with evidence that the fork + * might not really be empty, we need to scan the reverse mappings to + * decide if we're going to rebuild the fork. Data forks with nonzero + * file size are scanned. + */ + return i_size_read(VFS_I(ip)) != 0; +} + +/* * Decide if we want to walk every rmap btree in the fs to make sure that each * rmap for this file fork has corresponding bmbt entries. */ @@ -641,7 +723,6 @@ xchk_bmap_want_check_rmaps( struct xchk_bmap_info *info) { struct xfs_scrub *sc = info->sc; - struct xfs_ifork *ifp; if (!xfs_has_rmapbt(sc->mp)) return false; @@ -650,28 +731,10 @@ xchk_bmap_want_check_rmaps( if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return false; - /* Don't support realtime rmap checks yet. */ - if (info->is_rt) - return false; - - /* - * The inode repair code zaps broken inode forks by resetting them back - * to EXTENTS format and zero extent records. If we encounter a fork - * in this state along with evidence that the fork isn't supposed to be - * empty, we need to scan the reverse mappings to decide if we're going - * to rebuild the fork. Data forks with nonzero file size are scanned. - * xattr forks are never empty of content, so they are always scanned. - */ - ifp = xfs_ifork_ptr(sc->ip, info->whichfork); - if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) { - if (info->whichfork == XFS_DATA_FORK && - i_size_read(VFS_I(sc->ip)) == 0) - return false; - - return true; - } + if (info->whichfork == XFS_ATTR_FORK) + return xchk_bmap_check_empty_attrfork(sc->ip); - return false; + return xchk_bmap_check_empty_datafork(sc->ip); } /* Make sure each rmap has a corresponding bmbt entry. */ @@ -939,7 +1002,20 @@ int xchk_bmap_data( struct xfs_scrub *sc) { - return xchk_bmap(sc, XFS_DATA_FORK); + int error; + + if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) { + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + return 0; + } + + error = xchk_bmap(sc, XFS_DATA_FORK); + if (error) + return error; + + /* If the data fork is clean, it is clearly not zapped. */ + xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED); + return 0; } /* Scrub an inode's attr fork. */ @@ -947,7 +1023,27 @@ int xchk_bmap_attr( struct xfs_scrub *sc) { - return xchk_bmap(sc, XFS_ATTR_FORK); + int error; + + /* + * If the attr fork has been zapped, it's possible that forkoff was + * reset to zero and hence sc->ip->i_afp is NULL. We don't want the + * NULL ifp check in xchk_bmap to conclude that the attr fork is ok, + * so short circuit that logic by setting the corruption flag and + * returning immediately. + */ + if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) { + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + return 0; + } + + error = xchk_bmap(sc, XFS_ATTR_FORK); + if (error) + return error; + + /* If the attr fork is clean, it is clearly not zapped. */ + xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED); + return 0; } /* Scrub an inode's CoW fork. */ diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index e0d6d8c9f640..81f2b96bb5a7 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -25,6 +25,7 @@ #include "xfs_trans_priv.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_dir2_priv.h" #include "xfs_attr.h" #include "xfs_reflink.h" #include "xfs_ag.h" @@ -819,6 +820,26 @@ again: return 0; } +#ifdef CONFIG_XFS_QUOTA +/* + * Try to attach dquots to this inode if we think we might want to repair it. + * Callers must not hold any ILOCKs. If the dquots are broken and cannot be + * attached, a quotacheck will be scheduled. + */ +int +xchk_ino_dqattach( + struct xfs_scrub *sc) +{ + ASSERT(sc->tp != NULL); + ASSERT(sc->ip != NULL); + + if (!xchk_could_repair(sc)) + return 0; + + return xrep_ino_dqattach(sc); +} +#endif + /* Install an inode that we opened by handle for scrubbing. */ int xchk_install_handle_inode( @@ -1030,6 +1051,11 @@ xchk_setup_inode_contents( error = xchk_trans_alloc(sc, resblks); if (error) goto out; + + error = xchk_ino_dqattach(sc); + if (error) + goto out; + xchk_ilock(sc, XFS_ILOCK_EXCL); out: /* scrub teardown will unlock and release the inode for us */ @@ -1135,6 +1161,7 @@ xchk_metadata_inode_subtype( unsigned int scrub_type) { __u32 smtype = sc->sm->sm_type; + unsigned int sick_mask = sc->sick_mask; int error; sc->sm->sm_type = scrub_type; @@ -1152,6 +1179,7 @@ xchk_metadata_inode_subtype( break; } + sc->sick_mask = sick_mask; sc->sm->sm_type = smtype; return error; } diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index c31be570e7d8..ec5755266259 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -103,9 +103,15 @@ xchk_setup_rtsummary(struct xfs_scrub *sc) } #endif #ifdef CONFIG_XFS_QUOTA +int xchk_ino_dqattach(struct xfs_scrub *sc); int xchk_setup_quota(struct xfs_scrub *sc); #else static inline int +xchk_ino_dqattach(struct xfs_scrub *sc) +{ + return 0; +} +static inline int xchk_setup_quota(struct xfs_scrub *sc) { return -ENOENT; @@ -192,6 +198,8 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) XFS_SCRUB_OFLAG_XCORRUPT); } +bool xchk_dir_looks_zapped(struct xfs_inode *dp); + #ifdef CONFIG_XFS_ONLINE_REPAIR /* Decide if a repair is required. */ static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 0b491784b759..d86ab51af928 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -15,10 +15,12 @@ #include "xfs_icache.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" +#include "xfs_health.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/dabtree.h" #include "scrub/readdir.h" +#include "scrub/health.h" /* Set us up to scrub directories. */ int @@ -760,6 +762,11 @@ xchk_directory( if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) return -ENOENT; + if (xchk_file_looks_zapped(sc, XFS_SICK_INO_DIR_ZAPPED)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; + } + /* Plausible size? */ if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) { xchk_ino_set_corrupt(sc, sc->ip->i_ino); @@ -784,7 +791,36 @@ xchk_directory( /* Look up every name in this directory by hash. */ error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, NULL); - if (error == -ECANCELED) - error = 0; - return error; + if (error && error != -ECANCELED) + return error; + + /* If the dir is clean, it is clearly not zapped. */ + xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_DIR_ZAPPED); + return 0; +} + +/* + * Decide if this directory has been zapped to satisfy the inode and ifork + * verifiers. Checking and repairing should be postponed until the directory + * is fixed. + */ +bool +xchk_dir_looks_zapped( + struct xfs_inode *dp) +{ + /* Repair zapped this dir's data fork a short time ago */ + if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) + return true; + + /* + * If the dinode repair found a bad data fork, it will reset the fork + * to extents format with zero records and wait for the bmapbtd + * scrubber to reconstruct the block mappings. Directories always + * contain some content, so this is a clear sign of a zapped directory. + * The state checked by xfs_ifork_zapped is not persisted, so this is + * the secondary strategy if repairs are interrupted by a crash or an + * unmount. + */ + return dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS && + dp->i_df.if_nextents == 0; } diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 5e2b09ed6e29..df716da11226 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -118,6 +118,38 @@ xchk_health_mask_for_scrub_type( } /* + * If the scrub state is clean, add @mask to the scrub sick mask to clear + * additional sick flags from the metadata object's sick state. + */ +void +xchk_mark_healthy_if_clean( + struct xfs_scrub *sc, + unsigned int mask) +{ + if (!(sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | + XFS_SCRUB_OFLAG_XCORRUPT))) + sc->sick_mask |= mask; +} + +/* + * If we're scrubbing a piece of file metadata for the first time, does it look + * like it has been zapped? Skip the check if we just repaired the metadata + * and are revalidating it. + */ +bool +xchk_file_looks_zapped( + struct xfs_scrub *sc, + unsigned int mask) +{ + ASSERT((mask & ~XFS_SICK_INO_ZAPPED) == 0); + + if (sc->flags & XREP_ALREADY_FIXED) + return false; + + return xfs_inode_has_sickness(sc->ip, mask); +} + +/* * Update filesystem health assessments based on what we found and did. * * If the scrubber finds errors, we mark sick whatever's mentioned in diff --git a/fs/xfs/scrub/health.h b/fs/xfs/scrub/health.h index 66a273f8585b..a731b2467399 100644 --- a/fs/xfs/scrub/health.h +++ b/fs/xfs/scrub/health.h @@ -10,5 +10,7 @@ unsigned int xchk_health_mask_for_scrub_type(__u32 scrub_type); void xchk_update_health(struct xfs_scrub *sc); bool xchk_ag_btree_healthy_enough(struct xfs_scrub *sc, struct xfs_perag *pag, xfs_btnum_t btnum); +void xchk_mark_healthy_if_clean(struct xfs_scrub *sc, unsigned int mask); +bool xchk_file_looks_zapped(struct xfs_scrub *sc, unsigned int mask); #endif /* __XFS_SCRUB_HEALTH_H__ */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index b7a93380a1ab..6e2fe2d6250b 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -25,6 +25,7 @@ #include "scrub/common.h" #include "scrub/btree.h" #include "scrub/trace.h" +#include "scrub/repair.h" /* Prepare the attached inode for scrubbing. */ static inline int @@ -39,6 +40,10 @@ xchk_prepare_iscrub( if (error) return error; + error = xchk_ino_dqattach(sc); + if (error) + return error; + xchk_ilock(sc, XFS_ILOCK_EXCL); return 0; } @@ -181,8 +186,11 @@ xchk_setup_inode( * saying the inode is allocated and the icache being unable to load * the inode until we can flag the corruption in xchk_inode. The * scrub function has to note the corruption, since we're not really - * supposed to do that from the setup function. + * supposed to do that from the setup function. Save the mapping to + * make repairs to the ondisk inode buffer. */ + if (xchk_could_repair(sc)) + xrep_setup_inode(sc, &imap); return 0; out_cancel: @@ -338,6 +346,10 @@ xchk_inode_flags2( if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp)) goto bad; + /* no large extent counts without the filesystem feature */ + if ((flags2 & XFS_DIFLAG2_NREXT64) && !xfs_has_large_extent_counts(mp)) + goto bad; + return; bad: xchk_ino_set_corrupt(sc, ino); @@ -548,7 +560,7 @@ xchk_dinode( } /* di_forkoff */ - if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) + if (XFS_DFORK_BOFF(dip) >= mp->m_sb.sb_inodesize) xchk_ino_set_corrupt(sc, ino); if (naextents != 0 && dip->di_forkoff == 0) xchk_ino_set_corrupt(sc, ino); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c new file mode 100644 index 000000000000..66949cc3d7cc --- /dev/null +++ b/fs/xfs/scrub/inode_repair.c @@ -0,0 +1,1525 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2018-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <[email protected]> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_inode_buf.h" +#include "xfs_inode_fork.h" +#include "xfs_ialloc.h" +#include "xfs_da_format.h" +#include "xfs_reflink.h" +#include "xfs_alloc.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_bmap_util.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_quota_defs.h" +#include "xfs_quota.h" +#include "xfs_ag.h" +#include "xfs_rtbitmap.h" +#include "xfs_attr_leaf.h" +#include "xfs_log_priv.h" +#include "xfs_health.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/btree.h" +#include "scrub/trace.h" +#include "scrub/repair.h" + +/* + * Inode Record Repair + * =================== + * + * Roughly speaking, inode problems can be classified based on whether or not + * they trip the dinode verifiers. If those trip, then we won't be able to + * xfs_iget ourselves the inode. + * + * Therefore, the xrep_dinode_* functions fix anything that will cause the + * inode buffer verifier or the dinode verifier. The xrep_inode_* functions + * fix things on live incore inodes. The inode repair functions make decisions + * with security and usability implications when reviving a file: + * + * - Files with zero di_mode or a garbage di_mode are converted to regular file + * that only root can read. This file may not actually contain user data, + * if the file was not previously a regular file. Setuid and setgid bits + * are cleared. + * + * - Zero-size directories can be truncated to look empty. It is necessary to + * run the bmapbtd and directory repair functions to fully rebuild the + * directory. + * + * - Zero-size symbolic link targets can be truncated to '?'. It is necessary + * to run the bmapbtd and symlink repair functions to salvage the symlink. + * + * - Invalid extent size hints will be removed. + * + * - Quotacheck will be scheduled if we repaired an inode that was so badly + * damaged that the ondisk inode had to be rebuilt. + * + * - Invalid user, group, or project IDs (aka -1U) will be reset to zero. + * Setuid and setgid bits are cleared. + * + * - Data and attr forks are reset to extents format with zero extents if the + * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta + * repair functions to recover the space mapping. + * + * - ACLs will not be recovered if the attr fork is zapped or the extended + * attribute structure itself requires salvaging. + * + * - If the attr fork is zapped, the user and group ids are reset to root and + * the setuid and setgid bits are removed. + */ + +/* + * All the information we need to repair the ondisk inode if we can't iget the + * incore inode. We don't allocate this buffer unless we're going to perform + * a repair to the ondisk inode cluster buffer. + */ +struct xrep_inode { + /* Inode mapping that we saved from the initial lookup attempt. */ + struct xfs_imap imap; + + struct xfs_scrub *sc; + + /* Blocks in use on the data device by data extents or bmbt blocks. */ + xfs_rfsblock_t data_blocks; + + /* Blocks in use on the rt device. */ + xfs_rfsblock_t rt_blocks; + + /* Blocks in use by the attr fork. */ + xfs_rfsblock_t attr_blocks; + + /* Number of data device extents for the data fork. */ + xfs_extnum_t data_extents; + + /* + * Number of realtime device extents for the data fork. If + * data_extents and rt_extents indicate that the data fork has extents + * on both devices, we'll just back away slowly. + */ + xfs_extnum_t rt_extents; + + /* Number of (data device) extents for the attr fork. */ + xfs_aextnum_t attr_extents; + + /* Sick state to set after zapping parts of the inode. */ + unsigned int ino_sick_mask; + + /* Must we remove all access from this file? */ + bool zap_acls; +}; + +/* + * Setup function for inode repair. @imap contains the ondisk inode mapping + * information so that we can correct the ondisk inode cluster buffer if + * necessary to make iget work. + */ +int +xrep_setup_inode( + struct xfs_scrub *sc, + const struct xfs_imap *imap) +{ + struct xrep_inode *ri; + + sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS); + if (!sc->buf) + return -ENOMEM; + + ri = sc->buf; + memcpy(&ri->imap, imap, sizeof(struct xfs_imap)); + ri->sc = sc; + return 0; +} + +/* + * Make sure this ondisk inode can pass the inode buffer verifier. This is + * not the same as the dinode verifier. + */ +STATIC void +xrep_dinode_buf_core( + struct xfs_scrub *sc, + struct xfs_buf *bp, + unsigned int ioffset) +{ + struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset); + struct xfs_trans *tp = sc->tp; + struct xfs_mount *mp = sc->mp; + xfs_agino_t agino; + bool crc_ok = false; + bool magic_ok = false; + bool unlinked_ok = false; + + agino = be32_to_cpu(dip->di_next_unlinked); + + if (xfs_verify_agino_or_null(bp->b_pag, agino)) + unlinked_ok = true; + + if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && + xfs_dinode_good_version(mp, dip->di_version)) + magic_ok = true; + + if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF)) + crc_ok = true; + + if (magic_ok && unlinked_ok && crc_ok) + return; + + if (!magic_ok) { + dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + dip->di_version = 3; + } + if (!unlinked_ok) + dip->di_next_unlinked = cpu_to_be32(NULLAGINO); + xfs_dinode_calc_crc(mp, dip); + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); + xfs_trans_log_buf(tp, bp, ioffset, + ioffset + sizeof(struct xfs_dinode) - 1); +} + +/* Make sure this inode cluster buffer can pass the inode buffer verifier. */ +STATIC void +xrep_dinode_buf( + struct xfs_scrub *sc, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = sc->mp; + int i; + int ni; + + ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; + for (i = 0; i < ni; i++) + xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog); +} + +/* Reinitialize things that never change in an inode. */ +STATIC void +xrep_dinode_header( + struct xfs_scrub *sc, + struct xfs_dinode *dip) +{ + trace_xrep_dinode_header(sc, dip); + + dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + if (!xfs_dinode_good_version(sc->mp, dip->di_version)) + dip->di_version = 3; + dip->di_ino = cpu_to_be64(sc->sm->sm_ino); + uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid); + dip->di_gen = cpu_to_be32(sc->sm->sm_gen); +} + +/* Turn di_mode into /something/ recognizable. */ +STATIC void +xrep_dinode_mode( + struct xrep_inode *ri, + struct xfs_dinode *dip) +{ + struct xfs_scrub *sc = ri->sc; + uint16_t mode = be16_to_cpu(dip->di_mode); + + trace_xrep_dinode_mode(sc, dip); + + if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) + return; + + /* bad mode, so we set it to a file that only root can read */ + mode = S_IFREG; + dip->di_mode = cpu_to_be16(mode); + dip->di_uid = 0; + dip->di_gid = 0; + ri->zap_acls = true; +} + +/* Fix any conflicting flags that the verifiers complain about. */ +STATIC void +xrep_dinode_flags( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + bool isrt) +{ + struct xfs_mount *mp = sc->mp; + uint64_t flags2 = be64_to_cpu(dip->di_flags2); + uint16_t flags = be16_to_cpu(dip->di_flags); + uint16_t mode = be16_to_cpu(dip->di_mode); + + trace_xrep_dinode_flags(sc, dip); + + if (isrt) + flags |= XFS_DIFLAG_REALTIME; + else + flags &= ~XFS_DIFLAG_REALTIME; + + /* + * For regular files on a reflink filesystem, set the REFLINK flag to + * protect shared extents. A later stage will actually check those + * extents and clear the flag if possible. + */ + if (xfs_has_reflink(mp) && S_ISREG(mode)) + flags2 |= XFS_DIFLAG2_REFLINK; + else + flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE); + if (flags & XFS_DIFLAG_REALTIME) + flags2 &= ~XFS_DIFLAG2_REFLINK; + if (!xfs_has_bigtime(mp)) + flags2 &= ~XFS_DIFLAG2_BIGTIME; + if (!xfs_has_large_extent_counts(mp)) + flags2 &= ~XFS_DIFLAG2_NREXT64; + if (flags2 & XFS_DIFLAG2_NREXT64) + dip->di_nrext64_pad = 0; + else if (dip->di_version >= 3) + dip->di_v3_pad = 0; + dip->di_flags = cpu_to_be16(flags); + dip->di_flags2 = cpu_to_be64(flags2); +} + +/* + * Blow out symlink; now it points nowhere. We don't have to worry about + * incore state because this inode is failing the verifiers. + */ +STATIC void +xrep_dinode_zap_symlink( + struct xrep_inode *ri, + struct xfs_dinode *dip) +{ + struct xfs_scrub *sc = ri->sc; + char *p; + + trace_xrep_dinode_zap_symlink(sc, dip); + + dip->di_format = XFS_DINODE_FMT_LOCAL; + dip->di_size = cpu_to_be64(1); + p = XFS_DFORK_PTR(dip, XFS_DATA_FORK); + *p = '?'; + ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED; +} + +/* + * Blow out dir, make the parent point to the root. In the future repair will + * reconstruct this directory for us. Note that there's no in-core directory + * inode because the sf verifier tripped, so we don't have to worry about the + * dentry cache. + */ +STATIC void +xrep_dinode_zap_dir( + struct xrep_inode *ri, + struct xfs_dinode *dip) +{ + struct xfs_scrub *sc = ri->sc; + struct xfs_mount *mp = sc->mp; + struct xfs_dir2_sf_hdr *sfp; + int i8count; + + trace_xrep_dinode_zap_dir(sc, dip); + + dip->di_format = XFS_DINODE_FMT_LOCAL; + i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM; + sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); + sfp->count = 0; + sfp->i8count = i8count; + xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino); + dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count)); + ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED; +} + +/* Make sure we don't have a garbage file size. */ +STATIC void +xrep_dinode_size( + struct xrep_inode *ri, + struct xfs_dinode *dip) +{ + struct xfs_scrub *sc = ri->sc; + uint64_t size = be64_to_cpu(dip->di_size); + uint16_t mode = be16_to_cpu(dip->di_mode); + + trace_xrep_dinode_size(sc, dip); + + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + /* di_size can't be nonzero for special files */ + dip->di_size = 0; + break; + case S_IFREG: + /* Regular files can't be larger than 2^63-1 bytes. */ + dip->di_size = cpu_to_be64(size & ~(1ULL << 63)); + break; + case S_IFLNK: + /* + * Truncate ridiculously oversized symlinks. If the size is + * zero, reset it to point to the current directory. Both of + * these conditions trigger dinode verifier errors, so there + * is no in-core state to reset. + */ + if (size > XFS_SYMLINK_MAXLEN) + dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN); + else if (size == 0) + xrep_dinode_zap_symlink(ri, dip); + break; + case S_IFDIR: + /* + * Directories can't have a size larger than 32G. If the size + * is zero, reset it to an empty directory. Both of these + * conditions trigger dinode verifier errors, so there is no + * in-core state to reset. + */ + if (size > XFS_DIR2_SPACE_SIZE) + dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE); + else if (size == 0) + xrep_dinode_zap_dir(ri, dip); + break; + } +} + +/* Fix extent size hints. */ +STATIC void +xrep_dinode_extsize_hints( + struct xfs_scrub *sc, + struct xfs_dinode *dip) +{ + struct xfs_mount *mp = sc->mp; + uint64_t flags2 = be64_to_cpu(dip->di_flags2); + uint16_t flags = be16_to_cpu(dip->di_flags); + uint16_t mode = be16_to_cpu(dip->di_mode); + + xfs_failaddr_t fa; + + trace_xrep_dinode_extsize_hints(sc, dip); + + fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), + mode, flags); + if (fa) { + dip->di_extsize = 0; + dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE | + XFS_DIFLAG_EXTSZINHERIT); + } + + if (dip->di_version < 3) + return; + + fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), + mode, flags, flags2); + if (fa) { + dip->di_cowextsize = 0; + dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE); + } +} + +/* Count extents and blocks for an inode given an rmap. */ +STATIC int +xrep_dinode_walk_rmap( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *priv) +{ + struct xrep_inode *ri = priv; + int error = 0; + + if (xchk_should_terminate(ri->sc, &error)) + return error; + + /* We only care about this inode. */ + if (rec->rm_owner != ri->sc->sm->sm_ino) + return 0; + + if (rec->rm_flags & XFS_RMAP_ATTR_FORK) { + ri->attr_blocks += rec->rm_blockcount; + if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) + ri->attr_extents++; + + return 0; + } + + ri->data_blocks += rec->rm_blockcount; + if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) + ri->data_extents++; + + return 0; +} + +/* Count extents and blocks for an inode from all AG rmap data. */ +STATIC int +xrep_dinode_count_ag_rmaps( + struct xrep_inode *ri, + struct xfs_perag *pag) +{ + struct xfs_btree_cur *cur; + struct xfs_buf *agf; + int error; + + error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf); + if (error) + return error; + + cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag); + error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri); + xfs_btree_del_cursor(cur, error); + xfs_trans_brelse(ri->sc->tp, agf); + return error; +} + +/* Count extents and blocks for a given inode from all rmap data. */ +STATIC int +xrep_dinode_count_rmaps( + struct xrep_inode *ri) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno; + int error; + + if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp)) + return -EOPNOTSUPP; + + for_each_perag(ri->sc->mp, agno, pag) { + error = xrep_dinode_count_ag_rmaps(ri, pag); + if (error) { + xfs_perag_rele(pag); + return error; + } + } + + /* Can't have extents on both the rt and the data device. */ + if (ri->data_extents && ri->rt_extents) + return -EFSCORRUPTED; + + trace_xrep_dinode_count_rmaps(ri->sc, + ri->data_blocks, ri->rt_blocks, ri->attr_blocks, + ri->data_extents, ri->rt_extents, ri->attr_extents); + return 0; +} + +/* Return true if this extents-format ifork looks like garbage. */ +STATIC bool +xrep_dinode_bad_extents_fork( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + unsigned int dfork_size, + int whichfork) +{ + struct xfs_bmbt_irec new; + struct xfs_bmbt_rec *dp; + xfs_extnum_t nex; + bool isrt; + unsigned int i; + + nex = xfs_dfork_nextents(dip, whichfork); + if (nex > dfork_size / sizeof(struct xfs_bmbt_rec)) + return true; + + dp = XFS_DFORK_PTR(dip, whichfork); + + isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME); + for (i = 0; i < nex; i++, dp++) { + xfs_failaddr_t fa; + + xfs_bmbt_disk_get_all(dp, &new); + fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork, + &new); + if (fa) + return true; + } + + return false; +} + +/* Return true if this btree-format ifork looks like garbage. */ +STATIC bool +xrep_dinode_bad_bmbt_fork( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + unsigned int dfork_size, + int whichfork) +{ + struct xfs_bmdr_block *dfp; + xfs_extnum_t nex; + unsigned int i; + unsigned int dmxr; + unsigned int nrecs; + unsigned int level; + + nex = xfs_dfork_nextents(dip, whichfork); + if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec)) + return true; + + if (dfork_size < sizeof(struct xfs_bmdr_block)) + return true; + + dfp = XFS_DFORK_PTR(dip, whichfork); + nrecs = be16_to_cpu(dfp->bb_numrecs); + level = be16_to_cpu(dfp->bb_level); + + if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size) + return true; + if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) + return true; + + dmxr = xfs_bmdr_maxrecs(dfork_size, 0); + for (i = 1; i <= nrecs; i++) { + struct xfs_bmbt_key *fkp; + xfs_bmbt_ptr_t *fpp; + xfs_fileoff_t fileoff; + xfs_fsblock_t fsbno; + + fkp = XFS_BMDR_KEY_ADDR(dfp, i); + fileoff = be64_to_cpu(fkp->br_startoff); + if (!xfs_verify_fileoff(sc->mp, fileoff)) + return true; + + fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr); + fsbno = be64_to_cpu(*fpp); + if (!xfs_verify_fsbno(sc->mp, fsbno)) + return true; + } + + return false; +} + +/* + * Check the data fork for things that will fail the ifork verifiers or the + * ifork formatters. + */ +STATIC bool +xrep_dinode_check_dfork( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + uint16_t mode) +{ + void *dfork_ptr; + int64_t data_size; + unsigned int fmt; + unsigned int dfork_size; + + /* + * Verifier functions take signed int64_t, so check for bogus negative + * values first. + */ + data_size = be64_to_cpu(dip->di_size); + if (data_size < 0) + return true; + + fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK); + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (fmt != XFS_DINODE_FMT_DEV) + return true; + break; + case S_IFREG: + if (fmt == XFS_DINODE_FMT_LOCAL) + return true; + fallthrough; + case S_IFLNK: + case S_IFDIR: + switch (fmt) { + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return true; + } + break; + default: + return true; + } + + dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK); + dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); + + switch (fmt) { + case XFS_DINODE_FMT_DEV: + break; + case XFS_DINODE_FMT_LOCAL: + /* dir/symlink structure cannot be larger than the fork */ + if (data_size > dfork_size) + return true; + /* directory structure must pass verification. */ + if (S_ISDIR(mode) && + xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL) + return true; + /* symlink structure must pass verification. */ + if (S_ISLNK(mode) && + xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL) + return true; + break; + case XFS_DINODE_FMT_EXTENTS: + if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size, + XFS_DATA_FORK)) + return true; + break; + case XFS_DINODE_FMT_BTREE: + if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size, + XFS_DATA_FORK)) + return true; + break; + default: + return true; + } + + return false; +} + +static void +xrep_dinode_set_data_nextents( + struct xfs_dinode *dip, + xfs_extnum_t nextents) +{ + if (xfs_dinode_has_large_extent_counts(dip)) + dip->di_big_nextents = cpu_to_be64(nextents); + else + dip->di_nextents = cpu_to_be32(nextents); +} + +static void +xrep_dinode_set_attr_nextents( + struct xfs_dinode *dip, + xfs_extnum_t nextents) +{ + if (xfs_dinode_has_large_extent_counts(dip)) + dip->di_big_anextents = cpu_to_be32(nextents); + else + dip->di_anextents = cpu_to_be16(nextents); +} + +/* Reset the data fork to something sane. */ +STATIC void +xrep_dinode_zap_dfork( + struct xrep_inode *ri, + struct xfs_dinode *dip, + uint16_t mode) +{ + struct xfs_scrub *sc = ri->sc; + + trace_xrep_dinode_zap_dfork(sc, dip); + + ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED; + + xrep_dinode_set_data_nextents(dip, 0); + ri->data_blocks = 0; + ri->rt_blocks = 0; + + /* Special files always get reset to DEV */ + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + dip->di_format = XFS_DINODE_FMT_DEV; + dip->di_size = 0; + return; + } + + /* + * If we have data extents, reset to an empty map and hope the user + * will run the bmapbtd checker next. + */ + if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) { + dip->di_format = XFS_DINODE_FMT_EXTENTS; + return; + } + + /* Otherwise, reset the local format to the minimum. */ + switch (mode & S_IFMT) { + case S_IFLNK: + xrep_dinode_zap_symlink(ri, dip); + break; + case S_IFDIR: + xrep_dinode_zap_dir(ri, dip); + break; + } +} + +/* + * Check the attr fork for things that will fail the ifork verifiers or the + * ifork formatters. + */ +STATIC bool +xrep_dinode_check_afork( + struct xfs_scrub *sc, + struct xfs_dinode *dip) +{ + struct xfs_attr_shortform *afork_ptr; + size_t attr_size; + unsigned int afork_size; + + if (XFS_DFORK_BOFF(dip) == 0) + return dip->di_aformat != XFS_DINODE_FMT_EXTENTS || + xfs_dfork_attr_extents(dip) != 0; + + afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); + afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); + + switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) { + case XFS_DINODE_FMT_LOCAL: + /* Fork has to be large enough to extract the xattr size. */ + if (afork_size < sizeof(struct xfs_attr_sf_hdr)) + return true; + + /* xattr structure cannot be larger than the fork */ + attr_size = be16_to_cpu(afork_ptr->hdr.totsize); + if (attr_size > afork_size) + return true; + + /* xattr structure must pass verification. */ + return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL; + case XFS_DINODE_FMT_EXTENTS: + if (xrep_dinode_bad_extents_fork(sc, dip, afork_size, + XFS_ATTR_FORK)) + return true; + break; + case XFS_DINODE_FMT_BTREE: + if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size, + XFS_ATTR_FORK)) + return true; + break; + default: + return true; + } + + return false; +} + +/* + * Reset the attr fork to empty. Since the attr fork could have contained + * ACLs, make the file readable only by root. + */ +STATIC void +xrep_dinode_zap_afork( + struct xrep_inode *ri, + struct xfs_dinode *dip, + uint16_t mode) +{ + struct xfs_scrub *sc = ri->sc; + + trace_xrep_dinode_zap_afork(sc, dip); + + ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED; + + dip->di_aformat = XFS_DINODE_FMT_EXTENTS; + xrep_dinode_set_attr_nextents(dip, 0); + ri->attr_blocks = 0; + + /* + * If the data fork is in btree format, removing the attr fork entirely + * might cause verifier failures if the next level down in the bmbt + * could now fit in the data fork area. + */ + if (dip->di_format != XFS_DINODE_FMT_BTREE) + dip->di_forkoff = 0; + dip->di_mode = cpu_to_be16(mode & ~0777); + dip->di_uid = 0; + dip->di_gid = 0; +} + +/* Make sure the fork offset is a sensible value. */ +STATIC void +xrep_dinode_ensure_forkoff( + struct xrep_inode *ri, + struct xfs_dinode *dip, + uint16_t mode) +{ + struct xfs_bmdr_block *bmdr; + struct xfs_scrub *sc = ri->sc; + xfs_extnum_t attr_extents, data_extents; + size_t bmdr_minsz = XFS_BMDR_SPACE_CALC(1); + unsigned int lit_sz = XFS_LITINO(sc->mp); + unsigned int afork_min, dfork_min; + + trace_xrep_dinode_ensure_forkoff(sc, dip); + + /* + * Before calling this function, xrep_dinode_core ensured that both + * forks actually fit inside their respective literal areas. If this + * was not the case, the fork was reset to FMT_EXTENTS with zero + * records. If the rmapbt scan found attr or data fork blocks, this + * will be noted in the dinode_stats, and we must leave enough room + * for the bmap repair code to reconstruct the mapping structure. + * + * First, compute the minimum space required for the attr fork. + */ + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + /* + * If we still have a shortform xattr structure at all, that + * means the attr fork area was exactly large enough to fit + * the sf structure. + */ + afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_EXTENTS: + attr_extents = xfs_dfork_attr_extents(dip); + if (attr_extents) { + /* + * We must maintain sufficient space to hold the entire + * extent map array in the data fork. Note that we + * previously zapped the fork if it had no chance of + * fitting in the inode. + */ + afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents; + } else if (ri->attr_extents > 0) { + /* + * The attr fork thinks it has zero extents, but we + * found some xattr extents. We need to leave enough + * empty space here so that the incore attr fork will + * get created (and hence trigger the attr fork bmap + * repairer). + */ + afork_min = bmdr_minsz; + } else { + /* No extents on disk or found in rmapbt. */ + afork_min = 0; + } + break; + case XFS_DINODE_FMT_BTREE: + /* Must have space for btree header and key/pointers. */ + bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); + afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); + break; + default: + /* We should never see any other formats. */ + afork_min = 0; + break; + } + + /* Compute the minimum space required for the data fork. */ + switch (dip->di_format) { + case XFS_DINODE_FMT_DEV: + dfork_min = sizeof(__be32); + break; + case XFS_DINODE_FMT_UUID: + dfork_min = sizeof(uuid_t); + break; + case XFS_DINODE_FMT_LOCAL: + /* + * If we still have a shortform data fork at all, that means + * the data fork area was large enough to fit whatever was in + * there. + */ + dfork_min = be64_to_cpu(dip->di_size); + break; + case XFS_DINODE_FMT_EXTENTS: + data_extents = xfs_dfork_data_extents(dip); + if (data_extents) { + /* + * We must maintain sufficient space to hold the entire + * extent map array in the data fork. Note that we + * previously zapped the fork if it had no chance of + * fitting in the inode. + */ + dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents; + } else if (ri->data_extents > 0 || ri->rt_extents > 0) { + /* + * The data fork thinks it has zero extents, but we + * found some data extents. We need to leave enough + * empty space here so that the data fork bmap repair + * will recover the mappings. + */ + dfork_min = bmdr_minsz; + } else { + /* No extents on disk or found in rmapbt. */ + dfork_min = 0; + } + break; + case XFS_DINODE_FMT_BTREE: + /* Must have space for btree header and key/pointers. */ + bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); + dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); + break; + default: + dfork_min = 0; + break; + } + + /* + * Round all values up to the nearest 8 bytes, because that is the + * precision of di_forkoff. + */ + afork_min = roundup(afork_min, 8); + dfork_min = roundup(dfork_min, 8); + bmdr_minsz = roundup(bmdr_minsz, 8); + + ASSERT(dfork_min <= lit_sz); + ASSERT(afork_min <= lit_sz); + + /* + * If the data fork was zapped and we don't have enough space for the + * recovery fork, move the attr fork up. + */ + if (dip->di_format == XFS_DINODE_FMT_EXTENTS && + xfs_dfork_data_extents(dip) == 0 && + (ri->data_extents > 0 || ri->rt_extents > 0) && + bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) { + if (bmdr_minsz + afork_min > lit_sz) { + /* + * The attr for and the stub fork we need to recover + * the data fork won't both fit. Zap the attr fork. + */ + xrep_dinode_zap_afork(ri, dip, mode); + afork_min = bmdr_minsz; + } else { + void *before, *after; + + /* Otherwise, just slide the attr fork up. */ + before = XFS_DFORK_APTR(dip); + dip->di_forkoff = bmdr_minsz >> 3; + after = XFS_DFORK_APTR(dip); + memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp)); + } + } + + /* + * If the attr fork was zapped and we don't have enough space for the + * recovery fork, move the attr fork down. + */ + if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS && + xfs_dfork_attr_extents(dip) == 0 && + ri->attr_extents > 0 && + bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) { + if (dip->di_format == XFS_DINODE_FMT_BTREE) { + /* + * If the data fork is in btree format then we can't + * adjust forkoff because that runs the risk of + * violating the extents/btree format transition rules. + */ + } else if (bmdr_minsz + dfork_min > lit_sz) { + /* + * If we can't move the attr fork, too bad, we lose the + * attr fork and leak its blocks. + */ + xrep_dinode_zap_afork(ri, dip, mode); + } else { + /* + * Otherwise, just slide the attr fork down. The attr + * fork is empty, so we don't have any old contents to + * move here. + */ + dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3; + } + } +} + +/* + * Zap the data/attr forks if we spot anything that isn't going to pass the + * ifork verifiers or the ifork formatters, because we need to get the inode + * into good enough shape that the higher level repair functions can run. + */ +STATIC void +xrep_dinode_zap_forks( + struct xrep_inode *ri, + struct xfs_dinode *dip) +{ + struct xfs_scrub *sc = ri->sc; + xfs_extnum_t data_extents; + xfs_extnum_t attr_extents; + xfs_filblks_t nblocks; + uint16_t mode; + bool zap_datafork = false; + bool zap_attrfork = ri->zap_acls; + + trace_xrep_dinode_zap_forks(sc, dip); + + mode = be16_to_cpu(dip->di_mode); + + data_extents = xfs_dfork_data_extents(dip); + attr_extents = xfs_dfork_attr_extents(dip); + nblocks = be64_to_cpu(dip->di_nblocks); + + /* Inode counters don't make sense? */ + if (data_extents > nblocks) + zap_datafork = true; + if (attr_extents > nblocks) + zap_attrfork = true; + if (data_extents + attr_extents > nblocks) + zap_datafork = zap_attrfork = true; + + if (!zap_datafork) + zap_datafork = xrep_dinode_check_dfork(sc, dip, mode); + if (!zap_attrfork) + zap_attrfork = xrep_dinode_check_afork(sc, dip); + + /* Zap whatever's bad. */ + if (zap_attrfork) + xrep_dinode_zap_afork(ri, dip, mode); + if (zap_datafork) + xrep_dinode_zap_dfork(ri, dip, mode); + xrep_dinode_ensure_forkoff(ri, dip, mode); + + /* + * Zero di_nblocks if we don't have any extents at all to satisfy the + * buffer verifier. + */ + data_extents = xfs_dfork_data_extents(dip); + attr_extents = xfs_dfork_attr_extents(dip); + if (data_extents + attr_extents == 0) + dip->di_nblocks = 0; +} + +/* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */ +STATIC int +xrep_dinode_core( + struct xrep_inode *ri) +{ + struct xfs_scrub *sc = ri->sc; + struct xfs_buf *bp; + struct xfs_dinode *dip; + xfs_ino_t ino = sc->sm->sm_ino; + int error; + int iget_error; + + /* Figure out what this inode had mapped in both forks. */ + error = xrep_dinode_count_rmaps(ri); + if (error) + return error; + + /* Read the inode cluster buffer. */ + error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, + ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp, + NULL); + if (error) + return error; + + /* Make sure we can pass the inode buffer verifier. */ + xrep_dinode_buf(sc, bp); + bp->b_ops = &xfs_inode_buf_ops; + + /* Fix everything the verifier will complain about. */ + dip = xfs_buf_offset(bp, ri->imap.im_boffset); + xrep_dinode_header(sc, dip); + xrep_dinode_mode(ri, dip); + xrep_dinode_flags(sc, dip, ri->rt_extents > 0); + xrep_dinode_size(ri, dip); + xrep_dinode_extsize_hints(sc, dip); + xrep_dinode_zap_forks(ri, dip); + + /* Write out the inode. */ + trace_xrep_dinode_fixed(sc, dip); + xfs_dinode_calc_crc(sc->mp, dip); + xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF); + xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset, + ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1); + + /* + * In theory, we've fixed the ondisk inode record enough that we should + * be able to load the inode into the cache. Try to iget that inode + * now while we hold the AGI and the inode cluster buffer and take the + * IOLOCK so that we can continue with repairs without anyone else + * accessing the inode. If iget fails, we still need to commit the + * changes. + */ + iget_error = xchk_iget(sc, ino, &sc->ip); + if (!iget_error) + xchk_ilock(sc, XFS_IOLOCK_EXCL); + + /* + * Commit the inode cluster buffer updates and drop the AGI buffer that + * we've been holding since scrub setup. From here on out, repairs + * deal only with the cached inode. + */ + error = xrep_trans_commit(sc); + if (error) + return error; + + if (iget_error) + return iget_error; + + error = xchk_trans_alloc(sc, 0); + if (error) + return error; + + error = xrep_ino_dqattach(sc); + if (error) + return error; + + xchk_ilock(sc, XFS_ILOCK_EXCL); + if (ri->ino_sick_mask) + xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask); + return 0; +} + +/* Fix everything xfs_dinode_verify cares about. */ +STATIC int +xrep_dinode_problems( + struct xrep_inode *ri) +{ + struct xfs_scrub *sc = ri->sc; + int error; + + error = xrep_dinode_core(ri); + if (error) + return error; + + /* We had to fix a totally busted inode, schedule quotacheck. */ + if (XFS_IS_UQUOTA_ON(sc->mp)) + xrep_force_quotacheck(sc, XFS_DQTYPE_USER); + if (XFS_IS_GQUOTA_ON(sc->mp)) + xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); + if (XFS_IS_PQUOTA_ON(sc->mp)) + xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); + + return 0; +} + +/* + * Fix problems that the verifiers don't care about. In general these are + * errors that don't cause problems elsewhere in the kernel that we can easily + * detect, so we don't check them all that rigorously. + */ + +/* Make sure block and extent counts are ok. */ +STATIC int +xrep_inode_blockcounts( + struct xfs_scrub *sc) +{ + struct xfs_ifork *ifp; + xfs_filblks_t count; + xfs_filblks_t acount; + xfs_extnum_t nextents; + int error; + + trace_xrep_inode_blockcounts(sc); + + /* Set data fork counters from the data fork mappings. */ + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, + &nextents, &count); + if (error) + return error; + if (xfs_is_reflink_inode(sc->ip)) { + /* + * data fork blockcount can exceed physical storage if a user + * reflinks the same block over and over again. + */ + ; + } else if (XFS_IS_REALTIME_INODE(sc->ip)) { + if (count >= sc->mp->m_sb.sb_rblocks) + return -EFSCORRUPTED; + } else { + if (count >= sc->mp->m_sb.sb_dblocks) + return -EFSCORRUPTED; + } + error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents); + if (error) + return error; + sc->ip->i_df.if_nextents = nextents; + + /* Set attr fork counters from the attr fork mappings. */ + ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); + if (ifp) { + error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, + &nextents, &acount); + if (error) + return error; + if (count >= sc->mp->m_sb.sb_dblocks) + return -EFSCORRUPTED; + error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK, + nextents); + if (error) + return error; + ifp->if_nextents = nextents; + } else { + acount = 0; + } + + sc->ip->i_nblocks = count + acount; + return 0; +} + +/* Check for invalid uid/gid/prid. */ +STATIC void +xrep_inode_ids( + struct xfs_scrub *sc) +{ + bool dirty = false; + + trace_xrep_inode_ids(sc); + + if (!uid_valid(VFS_I(sc->ip)->i_uid)) { + i_uid_write(VFS_I(sc->ip), 0); + dirty = true; + if (XFS_IS_UQUOTA_ON(sc->mp)) + xrep_force_quotacheck(sc, XFS_DQTYPE_USER); + } + + if (!gid_valid(VFS_I(sc->ip)->i_gid)) { + i_gid_write(VFS_I(sc->ip), 0); + dirty = true; + if (XFS_IS_GQUOTA_ON(sc->mp)) + xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); + } + + if (sc->ip->i_projid == -1U) { + sc->ip->i_projid = 0; + dirty = true; + if (XFS_IS_PQUOTA_ON(sc->mp)) + xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); + } + + /* strip setuid/setgid if we touched any of the ids */ + if (dirty) + VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID); +} + +static inline void +xrep_clamp_timestamp( + struct xfs_inode *ip, + struct timespec64 *ts) +{ + ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC); + *ts = timestamp_truncate(*ts, VFS_I(ip)); +} + +/* Nanosecond counters can't have more than 1 billion. */ +STATIC void +xrep_inode_timestamps( + struct xfs_inode *ip) +{ + struct timespec64 tstamp; + struct inode *inode = VFS_I(ip); + + tstamp = inode_get_atime(inode); + xrep_clamp_timestamp(ip, &tstamp); + inode_set_atime_to_ts(inode, tstamp); + + tstamp = inode_get_mtime(inode); + xrep_clamp_timestamp(ip, &tstamp); + inode_set_mtime_to_ts(inode, tstamp); + + tstamp = inode_get_ctime(inode); + xrep_clamp_timestamp(ip, &tstamp); + inode_set_ctime_to_ts(inode, tstamp); + + xrep_clamp_timestamp(ip, &ip->i_crtime); +} + +/* Fix inode flags that don't make sense together. */ +STATIC void +xrep_inode_flags( + struct xfs_scrub *sc) +{ + uint16_t mode; + + trace_xrep_inode_flags(sc); + + mode = VFS_I(sc->ip)->i_mode; + + /* Clear junk flags */ + if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY) + sc->ip->i_diflags &= ~XFS_DIFLAG_ANY; + + /* NEWRTBM only applies to realtime bitmaps */ + if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino) + sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM; + else + sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM; + + /* These only make sense for directories. */ + if (!S_ISDIR(mode)) + sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT | + XFS_DIFLAG_EXTSZINHERIT | + XFS_DIFLAG_PROJINHERIT | + XFS_DIFLAG_NOSYMLINKS); + + /* These only make sense for files. */ + if (!S_ISREG(mode)) + sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME | + XFS_DIFLAG_EXTSIZE); + + /* These only make sense for non-rt files. */ + if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) + sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM; + + /* Immutable and append only? Drop the append. */ + if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) && + (sc->ip->i_diflags & XFS_DIFLAG_APPEND)) + sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND; + + /* Clear junk flags. */ + if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY) + sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY; + + /* No reflink flag unless we support it and it's a file. */ + if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode)) + sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + + /* DAX only applies to files and dirs. */ + if (!(S_ISREG(mode) || S_ISDIR(mode))) + sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; + + /* No reflink files on the realtime device. */ + if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) + sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; +} + +/* + * Fix size problems with block/node format directories. If we fail to find + * the extent list, just bail out and let the bmapbtd repair functions clean + * up that mess. + */ +STATIC void +xrep_inode_blockdir_size( + struct xfs_scrub *sc) +{ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec got; + struct xfs_ifork *ifp; + xfs_fileoff_t off; + int error; + + trace_xrep_inode_blockdir_size(sc); + + error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK); + if (error) + return; + + /* Find the last block before 32G; this is the dir size. */ + ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); + off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE); + if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) { + /* zero-extents directory? */ + return; + } + + off = got.br_startoff + got.br_blockcount; + sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE, + XFS_FSB_TO_B(sc->mp, off)); +} + +/* Fix size problems with short format directories. */ +STATIC void +xrep_inode_sfdir_size( + struct xfs_scrub *sc) +{ + struct xfs_ifork *ifp; + + trace_xrep_inode_sfdir_size(sc); + + ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); + sc->ip->i_disk_size = ifp->if_bytes; +} + +/* + * Fix any irregularities in a directory inode's size now that we can iterate + * extent maps and access other regular inode data. + */ +STATIC void +xrep_inode_dir_size( + struct xfs_scrub *sc) +{ + trace_xrep_inode_dir_size(sc); + + switch (sc->ip->i_df.if_format) { + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + xrep_inode_blockdir_size(sc); + break; + case XFS_DINODE_FMT_LOCAL: + xrep_inode_sfdir_size(sc); + break; + } +} + +/* Fix extent size hint problems. */ +STATIC void +xrep_inode_extsize( + struct xfs_scrub *sc) +{ + /* Fix misaligned extent size hints on a directory. */ + if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && + (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && + xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) { + sc->ip->i_extsize = 0; + sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT; + } +} + +/* Fix any irregularities in an inode that the verifiers don't catch. */ +STATIC int +xrep_inode_problems( + struct xfs_scrub *sc) +{ + int error; + + error = xrep_inode_blockcounts(sc); + if (error) + return error; + xrep_inode_timestamps(sc->ip); + xrep_inode_flags(sc); + xrep_inode_ids(sc); + /* + * We can now do a better job fixing the size of a directory now that + * we can scan the data fork extents than we could in xrep_dinode_size. + */ + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) + xrep_inode_dir_size(sc); + xrep_inode_extsize(sc); + + trace_xrep_inode_fixed(sc); + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + return xrep_roll_trans(sc); +} + +/* Repair an inode's fields. */ +int +xrep_inode( + struct xfs_scrub *sc) +{ + int error = 0; + + /* + * No inode? That means we failed the _iget verifiers. Repair all + * the things that the inode verifiers care about, then retry _iget. + */ + if (!sc->ip) { + struct xrep_inode *ri = sc->buf; + + ASSERT(ri != NULL); + + error = xrep_dinode_problems(ri); + if (error) + return error; + + /* By this point we had better have a working incore inode. */ + if (!sc->ip) + return -EFSCORRUPTED; + } + + xfs_trans_ijoin(sc->tp, sc->ip, 0); + + /* If we found corruption of any kind, try to fix it. */ + if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) { + error = xrep_inode_problems(sc); + if (error) + return error; + } + + /* See if we can clear the reflink flag. */ + if (xfs_is_reflink_inode(sc->ip)) { + error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); + if (error) + return error; + } + + return xrep_defer_finish(sc); +} diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index e6155d86f791..7db873672146 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -156,6 +156,16 @@ xchk_parent_validate( goto out_rele; } + /* + * We cannot yet validate this parent pointer if the directory looks as + * though it has been zapped by the inode record repair code. + */ + if (xchk_dir_looks_zapped(dp)) { + error = -EBUSY; + xchk_set_incomplete(sc); + goto out_unlock; + } + /* Look for a directory entry in the parent pointing to the child. */ error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc); if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) @@ -217,6 +227,13 @@ xchk_parent( */ error = xchk_parent_validate(sc, parent_ino); } while (error == -EAGAIN); + if (error == -EBUSY) { + /* + * We could not scan a directory, so we marked the check + * incomplete. No further error return is necessary. + */ + return 0; + } return error; } diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index a604f0cea8c1..25392dea326d 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -176,6 +176,16 @@ xrep_roll_ag_trans( return 0; } +/* Roll the scrub transaction, holding the primary metadata locked. */ +int +xrep_roll_trans( + struct xfs_scrub *sc) +{ + if (!sc->ip) + return xrep_roll_ag_trans(sc); + return xfs_trans_roll_inode(&sc->tp, sc->ip); +} + /* Finish all deferred work attached to the repair transaction. */ int xrep_defer_finish( @@ -673,6 +683,7 @@ xrep_find_ag_btree_roots( return error; } +#ifdef CONFIG_XFS_QUOTA /* Force a quotacheck the next time we mount. */ void xrep_force_quotacheck( @@ -699,10 +710,10 @@ xrep_force_quotacheck( * * This function ensures that the appropriate dquots are attached to an inode. * We cannot allow the dquot code to allocate an on-disk dquot block here - * because we're already in transaction context with the inode locked. The - * on-disk dquot should already exist anyway. If the quota code signals - * corruption or missing quota information, schedule quotacheck, which will - * repair corruptions in the quota metadata. + * because we're already in transaction context. The on-disk dquot should + * already exist anyway. If the quota code signals corruption or missing quota + * information, schedule quotacheck, which will repair corruptions in the quota + * metadata. */ int xrep_ino_dqattach( @@ -710,7 +721,10 @@ xrep_ino_dqattach( { int error; - error = xfs_qm_dqattach_locked(sc->ip, false); + ASSERT(sc->tp != NULL); + ASSERT(sc->ip != NULL); + + error = xfs_qm_dqattach(sc->ip); switch (error) { case -EFSBADCRC: case -EFSCORRUPTED: @@ -734,6 +748,39 @@ xrep_ino_dqattach( return error; } +#endif /* CONFIG_XFS_QUOTA */ + +/* + * Ensure that the inode being repaired is ready to handle a certain number of + * extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode + * being repaired and have joined it to the scrub transaction. + */ +int +xrep_ino_ensure_extent_count( + struct xfs_scrub *sc, + int whichfork, + xfs_extnum_t nextents) +{ + xfs_extnum_t max_extents; + bool inode_has_nrext64; + + inode_has_nrext64 = xfs_inode_has_large_extent_counts(sc->ip); + max_extents = xfs_iext_max_nextents(inode_has_nrext64, whichfork); + if (nextents <= max_extents) + return 0; + if (inode_has_nrext64) + return -EFSCORRUPTED; + if (!xfs_has_large_extent_counts(sc->mp)) + return -EFSCORRUPTED; + + max_extents = xfs_iext_max_nextents(true, whichfork); + if (nextents > max_extents) + return -EFSCORRUPTED; + + sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64; + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + return 0; +} /* * Initialize all the btree cursors for an AG repair except for the btree that diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index cc7ea3942729..a513b84f5330 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -30,11 +30,22 @@ static inline int xrep_notsupported(struct xfs_scrub *sc) int xrep_attempt(struct xfs_scrub *sc, struct xchk_stats_run *run); void xrep_failure(struct xfs_mount *mp); int xrep_roll_ag_trans(struct xfs_scrub *sc); +int xrep_roll_trans(struct xfs_scrub *sc); int xrep_defer_finish(struct xfs_scrub *sc); bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks, enum xfs_ag_resv_type type); xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc); +static inline int +xrep_trans_commit( + struct xfs_scrub *sc) +{ + int error = xfs_trans_commit(sc->tp); + + sc->tp = NULL; + return error; +} + struct xbitmap; struct xagb_bitmap; @@ -57,13 +68,25 @@ struct xrep_find_ag_btree { int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp, struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp); + +#ifdef CONFIG_XFS_QUOTA void xrep_force_quotacheck(struct xfs_scrub *sc, xfs_dqtype_t type); int xrep_ino_dqattach(struct xfs_scrub *sc); +#else +# define xrep_force_quotacheck(sc, type) ((void)0) +# define xrep_ino_dqattach(sc) (0) +#endif /* CONFIG_XFS_QUOTA */ + +int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork, + xfs_extnum_t nextents); int xrep_reset_perag_resv(struct xfs_scrub *sc); /* Repair setup functions */ int xrep_setup_ag_allocbt(struct xfs_scrub *sc); +struct xfs_imap; +int xrep_setup_inode(struct xfs_scrub *sc, const struct xfs_imap *imap); + void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa); /* Metadata revalidators */ @@ -81,12 +104,15 @@ int xrep_agi(struct xfs_scrub *sc); int xrep_allocbt(struct xfs_scrub *sc); int xrep_iallocbt(struct xfs_scrub *sc); int xrep_refcountbt(struct xfs_scrub *sc); +int xrep_inode(struct xfs_scrub *sc); int xrep_reinit_pagf(struct xfs_scrub *sc); int xrep_reinit_pagi(struct xfs_scrub *sc); #else +#define xrep_ino_dqattach(sc) (0) + static inline int xrep_attempt( struct xfs_scrub *sc, @@ -124,6 +150,8 @@ xrep_setup_nothing( } #define xrep_setup_ag_allocbt xrep_setup_nothing +#define xrep_setup_inode(sc, imap) ((void)0) + #define xrep_revalidate_allocbt (NULL) #define xrep_revalidate_iallocbt (NULL) @@ -135,6 +163,7 @@ xrep_setup_nothing( #define xrep_allocbt xrep_notsupported #define xrep_iallocbt xrep_notsupported #define xrep_refcountbt xrep_notsupported +#define xrep_inode xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 41a1d89ae8e6..d509a08d3fc3 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -32,6 +32,10 @@ xchk_setup_rtbitmap( if (error) return error; + error = xchk_ino_dqattach(sc); + if (error) + return error; + xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); return 0; } diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 8b15c47408d0..f94800a029f3 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -63,6 +63,10 @@ xchk_setup_rtsummary( if (error) return error; + error = xchk_ino_dqattach(sc); + if (error) + return error; + /* * Locking order requires us to take the rtbitmap first. We must be * careful to unlock it ourselves when we are done with the rtbitmap diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 6ff4dc57095f..7e903a0fde6c 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -282,7 +282,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_INODE, .setup = xchk_setup_inode, .scrub = xchk_inode, - .repair = xrep_notsupported, + .repair = xrep_inode, }, [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ .type = ST_INODE, diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index 38708fb9a5d7..60643d791d4a 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -12,8 +12,10 @@ #include "xfs_log_format.h" #include "xfs_inode.h" #include "xfs_symlink.h" +#include "xfs_health.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/health.h" /* Set us up to scrub a symbolic link. */ int @@ -41,13 +43,19 @@ xchk_symlink( if (!S_ISLNK(VFS_I(ip)->i_mode)) return -ENOENT; + + if (xchk_file_looks_zapped(sc, XFS_SICK_INO_SYMLINK_ZAPPED)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; + } + ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); len = ip->i_disk_size; /* Plausible size? */ if (len > XFS_SYMLINK_MAXLEN || len <= 0) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); - goto out; + return 0; } /* Inline symlink? */ @@ -55,15 +63,17 @@ xchk_symlink( if (len > xfs_inode_data_fork_size(ip) || len > strnlen(ifp->if_u1.if_data, xfs_inode_data_fork_size(ip))) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); - goto out; + return 0; } /* Remote symlink; must read the contents. */ error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf); if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) - goto out; + return error; if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); -out: - return error; + + /* If a remote symlink is clean, it is clearly not zapped. */ + xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_SYMLINK_ZAPPED); + return 0; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 3f7af4430951..120faa4dce2d 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1393,6 +1393,177 @@ DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_file_blocks); DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_free_blocks); DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_claim_block); +DECLARE_EVENT_CLASS(xrep_dinode_class, + TP_PROTO(struct xfs_scrub *sc, struct xfs_dinode *dip), + TP_ARGS(sc, dip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(uint16_t, mode) + __field(uint8_t, version) + __field(uint8_t, format) + __field(uint32_t, uid) + __field(uint32_t, gid) + __field(uint64_t, size) + __field(uint64_t, nblocks) + __field(uint32_t, extsize) + __field(uint32_t, nextents) + __field(uint16_t, anextents) + __field(uint8_t, forkoff) + __field(uint8_t, aformat) + __field(uint16_t, flags) + __field(uint32_t, gen) + __field(uint64_t, flags2) + __field(uint32_t, cowextsize) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->sm->sm_ino; + __entry->mode = be16_to_cpu(dip->di_mode); + __entry->version = dip->di_version; + __entry->format = dip->di_format; + __entry->uid = be32_to_cpu(dip->di_uid); + __entry->gid = be32_to_cpu(dip->di_gid); + __entry->size = be64_to_cpu(dip->di_size); + __entry->nblocks = be64_to_cpu(dip->di_nblocks); + __entry->extsize = be32_to_cpu(dip->di_extsize); + __entry->nextents = be32_to_cpu(dip->di_nextents); + __entry->anextents = be16_to_cpu(dip->di_anextents); + __entry->forkoff = dip->di_forkoff; + __entry->aformat = dip->di_aformat; + __entry->flags = be16_to_cpu(dip->di_flags); + __entry->gen = be32_to_cpu(dip->di_gen); + __entry->flags2 = be64_to_cpu(dip->di_flags2); + __entry->cowextsize = be32_to_cpu(dip->di_cowextsize); + ), + TP_printk("dev %d:%d ino 0x%llx mode 0x%x version %u format %u uid %u gid %u disize 0x%llx nblocks 0x%llx extsize %u nextents %u anextents %u forkoff 0x%x aformat %u flags 0x%x gen 0x%x flags2 0x%llx cowextsize %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->mode, + __entry->version, + __entry->format, + __entry->uid, + __entry->gid, + __entry->size, + __entry->nblocks, + __entry->extsize, + __entry->nextents, + __entry->anextents, + __entry->forkoff, + __entry->aformat, + __entry->flags, + __entry->gen, + __entry->flags2, + __entry->cowextsize) +) + +#define DEFINE_REPAIR_DINODE_EVENT(name) \ +DEFINE_EVENT(xrep_dinode_class, name, \ + TP_PROTO(struct xfs_scrub *sc, struct xfs_dinode *dip), \ + TP_ARGS(sc, dip)) +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_header); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_mode); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_flags); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_size); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_extsize_hints); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_symlink); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_dir); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_fixed); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_forks); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_dfork); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_zap_afork); +DEFINE_REPAIR_DINODE_EVENT(xrep_dinode_ensure_forkoff); + +DECLARE_EVENT_CLASS(xrep_inode_class, + TP_PROTO(struct xfs_scrub *sc), + TP_ARGS(sc), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_rfsblock_t, nblocks) + __field(uint16_t, flags) + __field(uint64_t, flags2) + __field(uint32_t, nextents) + __field(uint8_t, format) + __field(uint32_t, anextents) + __field(uint8_t, aformat) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->sm->sm_ino; + __entry->size = sc->ip->i_disk_size; + __entry->nblocks = sc->ip->i_nblocks; + __entry->flags = sc->ip->i_diflags; + __entry->flags2 = sc->ip->i_diflags2; + __entry->nextents = sc->ip->i_df.if_nextents; + __entry->format = sc->ip->i_df.if_format; + __entry->anextents = sc->ip->i_af.if_nextents; + __entry->aformat = sc->ip->i_af.if_format; + ), + TP_printk("dev %d:%d ino 0x%llx disize 0x%llx nblocks 0x%llx flags 0x%x flags2 0x%llx nextents %u format %u anextents %u aformat %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->nblocks, + __entry->flags, + __entry->flags2, + __entry->nextents, + __entry->format, + __entry->anextents, + __entry->aformat) +) + +#define DEFINE_REPAIR_INODE_EVENT(name) \ +DEFINE_EVENT(xrep_inode_class, name, \ + TP_PROTO(struct xfs_scrub *sc), \ + TP_ARGS(sc)) +DEFINE_REPAIR_INODE_EVENT(xrep_inode_blockcounts); +DEFINE_REPAIR_INODE_EVENT(xrep_inode_ids); +DEFINE_REPAIR_INODE_EVENT(xrep_inode_flags); +DEFINE_REPAIR_INODE_EVENT(xrep_inode_blockdir_size); +DEFINE_REPAIR_INODE_EVENT(xrep_inode_sfdir_size); +DEFINE_REPAIR_INODE_EVENT(xrep_inode_dir_size); +DEFINE_REPAIR_INODE_EVENT(xrep_inode_fixed); + +TRACE_EVENT(xrep_dinode_count_rmaps, + TP_PROTO(struct xfs_scrub *sc, xfs_rfsblock_t data_blocks, + xfs_rfsblock_t rt_blocks, xfs_rfsblock_t attr_blocks, + xfs_extnum_t data_extents, xfs_extnum_t rt_extents, + xfs_aextnum_t attr_extents), + TP_ARGS(sc, data_blocks, rt_blocks, attr_blocks, data_extents, + rt_extents, attr_extents), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_rfsblock_t, data_blocks) + __field(xfs_rfsblock_t, rt_blocks) + __field(xfs_rfsblock_t, attr_blocks) + __field(xfs_extnum_t, data_extents) + __field(xfs_extnum_t, rt_extents) + __field(xfs_aextnum_t, attr_extents) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->sm->sm_ino; + __entry->data_blocks = data_blocks; + __entry->rt_blocks = rt_blocks; + __entry->attr_blocks = attr_blocks; + __entry->data_extents = data_extents; + __entry->rt_extents = rt_extents; + __entry->attr_extents = attr_extents; + ), + TP_printk("dev %d:%d ino 0x%llx dblocks 0x%llx rtblocks 0x%llx ablocks 0x%llx dextents %llu rtextents %llu aextents %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->data_blocks, + __entry->rt_blocks, + __entry->attr_blocks, + __entry->data_extents, + __entry->rt_extents, + __entry->attr_extents) +); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 9f3ceb461515..57f42c2af0a3 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -18,6 +18,7 @@ #include "xfs_bmap.h" #include "xfs_trans.h" #include "xfs_error.h" +#include "xfs_health.h" /* * Directory file type support functions @@ -519,6 +520,8 @@ xfs_readdir( if (xfs_is_shutdown(dp->i_mount)) return -EIO; + if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) + return -EIO; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); ASSERT(xfs_isilocked(dp, XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 72a075bb2c10..9a57afee9338 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -222,7 +222,7 @@ xfs_inode_mark_sick( struct xfs_inode *ip, unsigned int mask) { - ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY)); + ASSERT(!(mask & ~(XFS_SICK_INO_PRIMARY | XFS_SICK_INO_ZAPPED))); trace_xfs_inode_mark_sick(ip, mask); spin_lock(&ip->i_flags_lock); @@ -246,7 +246,7 @@ xfs_inode_mark_healthy( struct xfs_inode *ip, unsigned int mask) { - ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY)); + ASSERT(!(mask & ~(XFS_SICK_INO_PRIMARY | XFS_SICK_INO_ZAPPED))); trace_xfs_inode_mark_healthy(ip, mask); spin_lock(&ip->i_flags_lock); @@ -369,6 +369,10 @@ static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_XATTR, XFS_BS_SICK_XATTR }, { XFS_SICK_INO_SYMLINK, XFS_BS_SICK_SYMLINK }, { XFS_SICK_INO_PARENT, XFS_BS_SICK_PARENT }, + { XFS_SICK_INO_BMBTD_ZAPPED, XFS_BS_SICK_BMBTD }, + { XFS_SICK_INO_BMBTA_ZAPPED, XFS_BS_SICK_BMBTA }, + { XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR }, + { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK }, { 0, 0 }, }; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c0f1c89786c2..ea6b277485a4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -37,6 +37,7 @@ #include "xfs_reflink.h" #include "xfs_ag.h" #include "xfs_log_priv.h" +#include "xfs_health.h" struct kmem_cache *xfs_inode_cache; @@ -661,6 +662,8 @@ xfs_lookup( if (xfs_is_shutdown(dp->i_mount)) return -EIO; + if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) + return -EIO; error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); if (error) @@ -978,6 +981,8 @@ xfs_create( if (xfs_is_shutdown(mp)) return -EIO; + if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) + return -EIO; prid = xfs_get_initial_prid(dp); @@ -1217,6 +1222,8 @@ xfs_link( if (xfs_is_shutdown(mp)) return -EIO; + if (xfs_ifork_zapped(tdp, XFS_DATA_FORK)) + return -EIO; error = xfs_qm_dqattach(sip); if (error) @@ -2506,6 +2513,8 @@ xfs_remove( if (xfs_is_shutdown(mp)) return -EIO; + if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) + return -EIO; error = xfs_qm_dqattach(dp); if (error) @@ -3758,3 +3767,29 @@ xfs_inode_reload_unlinked( return error; } + +/* Has this inode fork been zapped by repair? */ +bool +xfs_ifork_zapped( + const struct xfs_inode *ip, + int whichfork) +{ + unsigned int datamask = 0; + + switch (whichfork) { + case XFS_DATA_FORK: + switch (ip->i_vnode.i_mode & S_IFMT) { + case S_IFDIR: + datamask = XFS_SICK_INO_DIR_ZAPPED; + break; + case S_IFLNK: + datamask = XFS_SICK_INO_SYMLINK_ZAPPED; + break; + } + return ip->i_sick & (XFS_SICK_INO_BMBTD_ZAPPED | datamask); + case XFS_ATTR_FORK: + return ip->i_sick & XFS_SICK_INO_BMBTA_ZAPPED; + default: + return false; + } +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3beb470f1892..97f63bacd4c2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -622,4 +622,6 @@ xfs_inode_unlinked_incomplete( int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); int xfs_inode_reload_unlinked(struct xfs_inode *ip); +bool xfs_ifork_zapped(const struct xfs_inode *ip, int whichfork); + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 85e433df6a3f..7c713727f7fd 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -23,6 +23,7 @@ #include "xfs_trans.h" #include "xfs_ialloc.h" #include "xfs_error.h" +#include "xfs_health.h" /* ----- Kernel only functions below ----- */ int @@ -108,6 +109,8 @@ xfs_readlink( if (xfs_is_shutdown(mp)) return -EIO; + if (xfs_ifork_zapped(ip, XFS_DATA_FORK)) + return -EIO; xfs_ilock(ip, XFS_ILOCK_SHARED); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 987843f84d03..364104e1b38a 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -136,6 +136,9 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, }; int error; + if (xfs_ifork_zapped(XFS_I(inode), XFS_ATTR_FORK)) + return -EIO; + error = xfs_attr_get(&args); if (error) return error; @@ -294,6 +297,9 @@ xfs_vn_listxattr( struct inode *inode = d_inode(dentry); int error; + if (xfs_ifork_zapped(XFS_I(inode), XFS_ATTR_FORK)) + return -EIO; + /* * First read the regular on-disk attributes. */ |