diff options
Diffstat (limited to 'fs/xfs/scrub/nlinks.c')
| -rw-r--r-- | fs/xfs/scrub/nlinks.c | 930 | 
1 files changed, 930 insertions, 0 deletions
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c new file mode 100644 index 000000000000..8a7d9557897c --- /dev/null +++ b/fs/xfs/scrub/nlinks.c @@ -0,0 +1,930 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2021-2024 Oracle.  All Rights Reserved. + * Author: Darrick J. Wong <[email protected]> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_iwalk.h" +#include "xfs_ialloc.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ag.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/repair.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/iscan.h" +#include "scrub/nlinks.h" +#include "scrub/trace.h" +#include "scrub/readdir.h" + +/* + * Live Inode Link Count Checking + * ============================== + * + * Inode link counts are "summary" metadata, in the sense that they are + * computed as the number of directory entries referencing each file on the + * filesystem.  Therefore, we compute the correct link counts by creating a + * shadow link count structure and walking every inode. + */ + +/* Set us up to scrub inode link counts. */ +int +xchk_setup_nlinks( +	struct xfs_scrub	*sc) +{ +	xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); + +	sc->buf = kzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); +	if (!sc->buf) +		return -ENOMEM; + +	return xchk_setup_fs(sc); +} + +/* + * Part 1: Collecting file link counts.  For each file, we create a shadow link + * counting structure, then walk the entire directory tree, incrementing parent + * and child link counts for each directory entry seen. + * + * To avoid false corruption reports in part 2, any failure in this part must + * set the INCOMPLETE flag even when a negative errno is returned.  This care + * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, + * ECANCELED) that are absorbed into a scrub state flag update by + * xchk_*_process_error.  Scrub and repair share the same incore data + * structures, so the INCOMPLETE flag is critical to prevent a repair based on + * insufficient information. + * + * Because we are scanning a live filesystem, it's possible that another thread + * will try to update the link counts for an inode that we've already scanned. + * This will cause our counts to be incorrect.  Therefore, we hook all + * directory entry updates because that is when link count updates occur.  By + * shadowing transaction updates in this manner, live nlink check can ensure by + * locking the inode and the shadow structure that its own copies are not out + * of date.  Because the hook code runs in a different process context from the + * scrub code and the scrub state flags are not accessed atomically, failures + * in the hook code must abort the iscan and the scrubber must notice the + * aborted scan and set the incomplete flag. + * + * Note that we use jump labels and srcu notifier hooks to minimize the + * overhead when live nlinks is /not/ running.  Locking order for nlink + * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. + */ + +/* + * Add a delta to an nlink counter, clamping the value to U32_MAX.  Because + * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results + * even if we lose some precision. + */ +static inline void +careful_add( +	xfs_nlink_t	*nlinkp, +	int		delta) +{ +	uint64_t	new_value = (uint64_t)(*nlinkp) + delta; + +	BUILD_BUG_ON(XFS_MAXLINK > U32_MAX); +	*nlinkp = min_t(uint64_t, new_value, U32_MAX); +} + +/* Update incore link count information.  Caller must hold the nlinks lock. */ +STATIC int +xchk_nlinks_update_incore( +	struct xchk_nlink_ctrs	*xnc, +	xfs_ino_t		ino, +	int			parents_delta, +	int			backrefs_delta, +	int			children_delta) +{ +	struct xchk_nlink	nl; +	int			error; + +	if (!xnc->nlinks) +		return 0; + +	error = xfarray_load_sparse(xnc->nlinks, ino, &nl); +	if (error) +		return error; + +	trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta, +			backrefs_delta, children_delta); + +	careful_add(&nl.parents, parents_delta); +	careful_add(&nl.backrefs, backrefs_delta); +	careful_add(&nl.children, children_delta); + +	nl.flags |= XCHK_NLINK_WRITTEN; +	error = xfarray_store(xnc->nlinks, ino, &nl); +	if (error == -EFBIG) { +		/* +		 * EFBIG means we tried to store data at too high a byte offset +		 * in the sparse array.  IOWs, we cannot complete the check and +		 * must notify userspace that the check was incomplete. +		 */ +		error = -ECANCELED; +	} +	return error; +} + +/* + * Apply a link count change from the regular filesystem into our shadow link + * count structure based on a directory update in progress. + */ +STATIC int +xchk_nlinks_live_update( +	struct notifier_block		*nb, +	unsigned long			action, +	void				*data) +{ +	struct xfs_dir_update_params	*p = data; +	struct xchk_nlink_ctrs		*xnc; +	int				error; + +	xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); + +	trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, +			p->delta, p->name->name, p->name->len); + +	/* +	 * If we've already scanned @dp, update the number of parents that link +	 * to @ip.  If @ip is a subdirectory, update the number of child links +	 * going out of @dp. +	 */ +	if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { +		mutex_lock(&xnc->lock); +		error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, +				0, 0); +		if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) +			error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, +					0, p->delta); +		mutex_unlock(&xnc->lock); +		if (error) +			goto out_abort; +	} + +	/* +	 * If @ip is a subdirectory and we've already scanned it, update the +	 * number of backrefs pointing to @dp. +	 */ +	if (S_ISDIR(VFS_IC(p->ip)->i_mode) && +	    xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { +		mutex_lock(&xnc->lock); +		error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, +				p->delta, 0); +		mutex_unlock(&xnc->lock); +		if (error) +			goto out_abort; +	} + +	return NOTIFY_DONE; + +out_abort: +	xchk_iscan_abort(&xnc->collect_iscan); +	return NOTIFY_DONE; +} + +/* Bump the observed link count for the inode referenced by this entry. */ +STATIC int +xchk_nlinks_collect_dirent( +	struct xfs_scrub	*sc, +	struct xfs_inode	*dp, +	xfs_dir2_dataptr_t	dapos, +	const struct xfs_name	*name, +	xfs_ino_t		ino, +	void			*priv) +{ +	struct xchk_nlink_ctrs	*xnc = priv; +	bool			dot = false, dotdot = false; +	int			error; + +	/* Does this name make sense? */ +	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) { +		error = -ECANCELED; +		goto out_abort; +	} + +	if (name->len == 1 && name->name[0] == '.') +		dot = true; +	else if (name->len == 2 && name->name[0] == '.' && +				   name->name[1] == '.') +		dotdot = true; + +	/* Don't accept a '.' entry that points somewhere else. */ +	if (dot && ino != dp->i_ino) { +		error = -ECANCELED; +		goto out_abort; +	} + +	/* Don't accept an invalid inode number. */ +	if (!xfs_verify_dir_ino(sc->mp, ino)) { +		error = -ECANCELED; +		goto out_abort; +	} + +	/* Update the shadow link counts if we haven't already failed. */ + +	if (xchk_iscan_aborted(&xnc->collect_iscan)) { +		error = -ECANCELED; +		goto out_incomplete; +	} + +	trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name); + +	mutex_lock(&xnc->lock); + +	/* +	 * If this is a dotdot entry, it is a back link from dp to ino.  How +	 * we handle this depends on whether or not dp is the root directory. +	 * +	 * The root directory is its own parent, so we pretend the dotdot entry +	 * establishes the "parent" of the root directory.  Increment the +	 * number of parents of the root directory. +	 * +	 * Otherwise, increment the number of backrefs pointing back to ino. +	 */ +	if (dotdot) { +		if (dp == sc->mp->m_rootip) +			error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); +		else +			error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); +		if (error) +			goto out_unlock; +	} + +	/* +	 * If this dirent is a forward link from dp to ino, increment the +	 * number of parents linking into ino. +	 */ +	if (!dot && !dotdot) { +		error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); +		if (error) +			goto out_unlock; +	} + +	/* +	 * If this dirent is a forward link to a subdirectory, increment the +	 * number of child links of dp. +	 */ +	if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) { +		error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1); +		if (error) +			goto out_unlock; +	} + +	mutex_unlock(&xnc->lock); +	return 0; + +out_unlock: +	mutex_unlock(&xnc->lock); +out_abort: +	xchk_iscan_abort(&xnc->collect_iscan); +out_incomplete: +	xchk_set_incomplete(sc); +	return error; +} + +/* Walk a directory to bump the observed link counts of the children. */ +STATIC int +xchk_nlinks_collect_dir( +	struct xchk_nlink_ctrs	*xnc, +	struct xfs_inode	*dp) +{ +	struct xfs_scrub	*sc = xnc->sc; +	unsigned int		lock_mode; +	int			error = 0; + +	/* Prevent anyone from changing this directory while we walk it. */ +	xfs_ilock(dp, XFS_IOLOCK_SHARED); +	lock_mode = xfs_ilock_data_map_shared(dp); + +	/* +	 * The dotdot entry of an unlinked directory still points to the last +	 * parent, but the parent no longer links to this directory.  Skip the +	 * directory to avoid overcounting. +	 */ +	if (VFS_I(dp)->i_nlink == 0) +		goto out_unlock; + +	/* +	 * We cannot count file links if the directory looks as though it has +	 * been zapped by the inode record repair code. +	 */ +	if (xchk_dir_looks_zapped(dp)) { +		error = -EBUSY; +		goto out_abort; +	} + +	error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc); +	if (error == -ECANCELED) { +		error = 0; +		goto out_unlock; +	} +	if (error) +		goto out_abort; + +	xchk_iscan_mark_visited(&xnc->collect_iscan, dp); +	goto out_unlock; + +out_abort: +	xchk_set_incomplete(sc); +	xchk_iscan_abort(&xnc->collect_iscan); +out_unlock: +	xfs_iunlock(dp, lock_mode); +	xfs_iunlock(dp, XFS_IOLOCK_SHARED); +	return error; +} + +/* If this looks like a valid pointer, count it. */ +static inline int +xchk_nlinks_collect_metafile( +	struct xchk_nlink_ctrs	*xnc, +	xfs_ino_t		ino) +{ +	if (!xfs_verify_ino(xnc->sc->mp, ino)) +		return 0; + +	trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino); +	return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); +} + +/* Bump the link counts of metadata files rooted in the superblock. */ +STATIC int +xchk_nlinks_collect_metafiles( +	struct xchk_nlink_ctrs	*xnc) +{ +	struct xfs_mount	*mp = xnc->sc->mp; +	int			error = -ECANCELED; + + +	if (xchk_iscan_aborted(&xnc->collect_iscan)) +		goto out_incomplete; + +	mutex_lock(&xnc->lock); +	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino); +	if (error) +		goto out_abort; + +	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino); +	if (error) +		goto out_abort; + +	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino); +	if (error) +		goto out_abort; + +	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino); +	if (error) +		goto out_abort; + +	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino); +	if (error) +		goto out_abort; +	mutex_unlock(&xnc->lock); + +	return 0; + +out_abort: +	mutex_unlock(&xnc->lock); +	xchk_iscan_abort(&xnc->collect_iscan); +out_incomplete: +	xchk_set_incomplete(xnc->sc); +	return error; +} + +/* Advance the collection scan cursor for this non-directory file. */ +static inline int +xchk_nlinks_collect_file( +	struct xchk_nlink_ctrs	*xnc, +	struct xfs_inode	*ip) +{ +	xfs_ilock(ip, XFS_IOLOCK_SHARED); +	xchk_iscan_mark_visited(&xnc->collect_iscan, ip); +	xfs_iunlock(ip, XFS_IOLOCK_SHARED); +	return 0; +} + +/* Walk all directories and count inode links. */ +STATIC int +xchk_nlinks_collect( +	struct xchk_nlink_ctrs	*xnc) +{ +	struct xfs_scrub	*sc = xnc->sc; +	struct xfs_inode	*ip; +	int			error; + +	/* Count the rt and quota files that are rooted in the superblock. */ +	error = xchk_nlinks_collect_metafiles(xnc); +	if (error) +		return error; + +	/* +	 * Set up for a potentially lengthy filesystem scan by reducing our +	 * transaction resource usage for the duration.  Specifically: +	 * +	 * Cancel the transaction to release the log grant space while we scan +	 * the filesystem. +	 * +	 * Create a new empty transaction to eliminate the possibility of the +	 * inode scan deadlocking on cyclical metadata. +	 * +	 * We pass the empty transaction to the file scanning function to avoid +	 * repeatedly cycling empty transactions.  This can be done even though +	 * we take the IOLOCK to quiesce the file because empty transactions +	 * do not take sb_internal. +	 */ +	xchk_trans_cancel(sc); +	error = xchk_trans_alloc_empty(sc); +	if (error) +		return error; + +	while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { +		if (S_ISDIR(VFS_I(ip)->i_mode)) +			error = xchk_nlinks_collect_dir(xnc, ip); +		else +			error = xchk_nlinks_collect_file(xnc, ip); +		xchk_irele(sc, ip); +		if (error) +			break; + +		if (xchk_should_terminate(sc, &error)) +			break; +	} +	xchk_iscan_iter_finish(&xnc->collect_iscan); +	if (error) { +		xchk_set_incomplete(sc); +		/* +		 * If we couldn't grab an inode that was busy with a state +		 * change, change the error code so that we exit to userspace +		 * as quickly as possible. +		 */ +		if (error == -EBUSY) +			return -ECANCELED; +		return error; +	} + +	/* +	 * Switch out for a real transaction in preparation for building a new +	 * tree. +	 */ +	xchk_trans_cancel(sc); +	return xchk_setup_fs(sc); +} + +/* + * Part 2: Comparing file link counters.  Walk each inode and compare the link + * counts against our shadow information; and then walk each shadow link count + * structure (that wasn't covered in the first part), comparing it against the + * file. + */ + +/* Read the observed link count for comparison with the actual inode. */ +STATIC int +xchk_nlinks_comparison_read( +	struct xchk_nlink_ctrs	*xnc, +	xfs_ino_t		ino, +	struct xchk_nlink	*obs) +{ +	struct xchk_nlink	nl; +	int			error; + +	error = xfarray_load_sparse(xnc->nlinks, ino, &nl); +	if (error) +		return error; + +	nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN); + +	error = xfarray_store(xnc->nlinks, ino, &nl); +	if (error == -EFBIG) { +		/* +		 * EFBIG means we tried to store data at too high a byte offset +		 * in the sparse array.  IOWs, we cannot complete the check and +		 * must notify userspace that the check was incomplete.  This +		 * shouldn't really happen outside of the collection phase. +		 */ +		xchk_set_incomplete(xnc->sc); +		return -ECANCELED; +	} +	if (error) +		return error; + +	/* Copy the counters, but do not expose the internal state. */ +	obs->parents = nl.parents; +	obs->backrefs = nl.backrefs; +	obs->children = nl.children; +	obs->flags = 0; +	return 0; +} + +/* Check our link count against an inode. */ +STATIC int +xchk_nlinks_compare_inode( +	struct xchk_nlink_ctrs	*xnc, +	struct xfs_inode	*ip) +{ +	struct xchk_nlink	obs; +	struct xfs_scrub	*sc = xnc->sc; +	uint64_t		total_links; +	unsigned int		actual_nlink; +	int			error; + +	xfs_ilock(ip, XFS_ILOCK_SHARED); +	mutex_lock(&xnc->lock); + +	if (xchk_iscan_aborted(&xnc->collect_iscan)) { +		xchk_set_incomplete(xnc->sc); +		error = -ECANCELED; +		goto out_scanlock; +	} + +	error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); +	if (error) +		goto out_scanlock; + +	/* +	 * If we don't have ftype to get an accurate count of the subdirectory +	 * entries in this directory, take advantage of the fact that on a +	 * consistent ftype=0 filesystem, the number of subdirectory +	 * backreferences (dotdot entries) pointing towards this directory +	 * should be equal to the number of subdirectory entries in the +	 * directory. +	 */ +	if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode)) +		obs.children = obs.backrefs; + +	total_links = xchk_nlink_total(ip, &obs); +	actual_nlink = VFS_I(ip)->i_nlink; + +	trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); + +	/* +	 * If we found so many parents that we'd overflow i_nlink, we must flag +	 * this as a corruption.  The VFS won't let users increase the link +	 * count, but it will let them decrease it. +	 */ +	if (total_links > XFS_MAXLINK) { +		xchk_ino_set_corrupt(sc, ip->i_ino); +		goto out_corrupt; +	} + +	/* Link counts should match. */ +	if (total_links != actual_nlink) { +		xchk_ino_set_corrupt(sc, ip->i_ino); +		goto out_corrupt; +	} + +	if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) { +		/* +		 * The collection phase ignores directories with zero link +		 * count, so we ignore them here too. +		 * +		 * The number of subdirectory backreferences (dotdot entries) +		 * pointing towards this directory should be equal to the +		 * number of subdirectory entries in the directory. +		 */ +		if (obs.children != obs.backrefs) +			xchk_ino_xref_set_corrupt(sc, ip->i_ino); +	} else { +		/* +		 * Non-directories and unlinked directories should not have +		 * back references. +		 */ +		if (obs.backrefs != 0) { +			xchk_ino_set_corrupt(sc, ip->i_ino); +			goto out_corrupt; +		} + +		/* +		 * Non-directories and unlinked directories should not have +		 * children. +		 */ +		if (obs.children != 0) { +			xchk_ino_set_corrupt(sc, ip->i_ino); +			goto out_corrupt; +		} +	} + +	if (ip == sc->mp->m_rootip) { +		/* +		 * For the root of a directory tree, both the '.' and '..' +		 * entries should point to the root directory.  The dotdot +		 * entry is counted as a parent of the root /and/ a backref of +		 * the root directory. +		 */ +		if (obs.parents != 1) { +			xchk_ino_set_corrupt(sc, ip->i_ino); +			goto out_corrupt; +		} +	} else if (actual_nlink > 0) { +		/* +		 * Linked files that are not the root directory should have at +		 * least one parent. +		 */ +		if (obs.parents == 0) { +			xchk_ino_set_corrupt(sc, ip->i_ino); +			goto out_corrupt; +		} +	} + +out_corrupt: +	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) +		error = -ECANCELED; +out_scanlock: +	mutex_unlock(&xnc->lock); +	xfs_iunlock(ip, XFS_ILOCK_SHARED); +	return error; +} + +/* + * Check our link count against an inode that wasn't checked previously.  This + * is intended to catch directories with dangling links, though we could be + * racing with inode allocation in other threads. + */ +STATIC int +xchk_nlinks_compare_inum( +	struct xchk_nlink_ctrs	*xnc, +	xfs_ino_t		ino) +{ +	struct xchk_nlink	obs; +	struct xfs_mount	*mp = xnc->sc->mp; +	struct xfs_trans	*tp = xnc->sc->tp; +	struct xfs_buf		*agi_bp; +	struct xfs_inode	*ip; +	int			error; + +	/* +	 * The first iget failed, so try again with the variant that returns +	 * either an incore inode or the AGI buffer.  If the function returns +	 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we +	 * can guarantee that the inode won't be allocated while we check for +	 * a zero link count in the observed link count data. +	 */ +	error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip); +	if (!error) { +		/* Actually got an inode, so use the inode compare. */ +		error = xchk_nlinks_compare_inode(xnc, ip); +		xchk_irele(xnc->sc, ip); +		return error; +	} +	if (error == -ENOENT || error == -EINVAL) { +		/* No inode was found.  Check for zero link count below. */ +		error = 0; +	} +	if (error) +		goto out_agi; + +	/* Ensure that we have protected against inode allocation/freeing. */ +	if (agi_bp == NULL) { +		ASSERT(agi_bp != NULL); +		xchk_set_incomplete(xnc->sc); +		return -ECANCELED; +	} + +	if (xchk_iscan_aborted(&xnc->collect_iscan)) { +		xchk_set_incomplete(xnc->sc); +		error = -ECANCELED; +		goto out_agi; +	} + +	mutex_lock(&xnc->lock); +	error = xchk_nlinks_comparison_read(xnc, ino, &obs); +	if (error) +		goto out_scanlock; + +	trace_xchk_nlinks_check_zero(mp, ino, &obs); + +	/* +	 * If we can't grab the inode, the link count had better be zero.  We +	 * still hold the AGI to prevent inode allocation/freeing. +	 */ +	if (xchk_nlink_total(NULL, &obs) != 0) { +		xchk_ino_set_corrupt(xnc->sc, ino); +		error = -ECANCELED; +	} + +out_scanlock: +	mutex_unlock(&xnc->lock); +out_agi: +	if (agi_bp) +		xfs_trans_brelse(tp, agi_bp); +	return error; +} + +/* + * Try to visit every inode in the filesystem to compare the link count.  Move + * on if we can't grab an inode, since we'll revisit unchecked nlink records in + * the second part. + */ +static int +xchk_nlinks_compare_iter( +	struct xchk_nlink_ctrs	*xnc, +	struct xfs_inode	**ipp) +{ +	int			error; + +	do { +		error = xchk_iscan_iter(&xnc->compare_iscan, ipp); +	} while (error == -EBUSY); + +	return error; +} + +/* Compare the link counts we observed against the live information. */ +STATIC int +xchk_nlinks_compare( +	struct xchk_nlink_ctrs	*xnc) +{ +	struct xchk_nlink	nl; +	struct xfs_scrub	*sc = xnc->sc; +	struct xfs_inode	*ip; +	xfarray_idx_t		cur = XFARRAY_CURSOR_INIT; +	int			error; + +	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) +		return 0; + +	/* +	 * Create a new empty transaction so that we can advance the iscan +	 * cursor without deadlocking if the inobt has a cycle and push on the +	 * inactivation workqueue. +	 */ +	xchk_trans_cancel(sc); +	error = xchk_trans_alloc_empty(sc); +	if (error) +		return error; + +	/* +	 * Use the inobt to walk all allocated inodes to compare the link +	 * counts.  Inodes skipped by _compare_iter will be tried again in the +	 * next phase of the scan. +	 */ +	xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan); +	while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) { +		error = xchk_nlinks_compare_inode(xnc, ip); +		xchk_iscan_mark_visited(&xnc->compare_iscan, ip); +		xchk_irele(sc, ip); +		if (error) +			break; + +		if (xchk_should_terminate(sc, &error)) +			break; +	} +	xchk_iscan_iter_finish(&xnc->compare_iscan); +	xchk_iscan_teardown(&xnc->compare_iscan); +	if (error) +		return error; + +	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) +		return 0; + +	/* +	 * Walk all the non-null nlink observations that weren't checked in the +	 * previous step. +	 */ +	mutex_lock(&xnc->lock); +	while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) { +		xfs_ino_t	ino = cur - 1; + +		if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) +			continue; + +		mutex_unlock(&xnc->lock); + +		error = xchk_nlinks_compare_inum(xnc, ino); +		if (error) +			return error; + +		if (xchk_should_terminate(xnc->sc, &error)) +			return error; + +		mutex_lock(&xnc->lock); +	} +	mutex_unlock(&xnc->lock); + +	return error; +} + +/* Tear down everything associated with a nlinks check. */ +static void +xchk_nlinks_teardown_scan( +	void			*priv) +{ +	struct xchk_nlink_ctrs	*xnc = priv; + +	/* Discourage any hook functions that might be running. */ +	xchk_iscan_abort(&xnc->collect_iscan); + +	xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); + +	xfarray_destroy(xnc->nlinks); +	xnc->nlinks = NULL; + +	xchk_iscan_teardown(&xnc->collect_iscan); +	mutex_destroy(&xnc->lock); +	xnc->sc = NULL; +} + +/* + * Scan all inodes in the entire filesystem to generate link count data.  If + * the scan is successful, the counts will be left alive for a repair.  If any + * error occurs, we'll tear everything down. + */ +STATIC int +xchk_nlinks_setup_scan( +	struct xfs_scrub	*sc, +	struct xchk_nlink_ctrs	*xnc) +{ +	struct xfs_mount	*mp = sc->mp; +	char			*descr; +	unsigned long long	max_inos; +	xfs_agnumber_t		last_agno = mp->m_sb.sb_agcount - 1; +	xfs_agino_t		first_agino, last_agino; +	int			error; + +	ASSERT(xnc->sc == NULL); +	xnc->sc = sc; + +	mutex_init(&xnc->lock); + +	/* Retry iget every tenth of a second for up to 30 seconds. */ +	xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan); + +	/* +	 * Set up enough space to store an nlink record for the highest +	 * possible inode number in this system. +	 */ +	xfs_agino_range(mp, last_agno, &first_agino, &last_agino); +	max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; +	descr = xchk_xfile_descr(sc, "file link counts"); +	error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), +			sizeof(struct xchk_nlink), &xnc->nlinks); +	kfree(descr); +	if (error) +		goto out_teardown; + +	/* +	 * Hook into the directory entry code so that we can capture updates to +	 * file link counts.  The hook only triggers for inodes that were +	 * already scanned, and the scanner thread takes each inode's ILOCK, +	 * which means that any in-progress inode updates will finish before we +	 * can scan the inode. +	 */ +	ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); +	xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); +	error = xfs_dir_hook_add(mp, &xnc->dhook); +	if (error) +		goto out_teardown; + +	/* Use deferred cleanup to pass the inode link count data to repair. */ +	sc->buf_cleanup = xchk_nlinks_teardown_scan; +	return 0; + +out_teardown: +	xchk_nlinks_teardown_scan(xnc); +	return error; +} + +/* Scrub the link count of all inodes on the filesystem. */ +int +xchk_nlinks( +	struct xfs_scrub	*sc) +{ +	struct xchk_nlink_ctrs	*xnc = sc->buf; +	int			error = 0; + +	/* Set ourselves up to check link counts on the live filesystem. */ +	error = xchk_nlinks_setup_scan(sc, xnc); +	if (error) +		return error; + +	/* Walk all inodes, picking up link count information. */ +	error = xchk_nlinks_collect(xnc); +	if (!xchk_xref_process_error(sc, 0, 0, &error)) +		return error; + +	/* Fail fast if we're not playing with a full dataset. */ +	if (xchk_iscan_aborted(&xnc->collect_iscan)) +		xchk_set_incomplete(sc); +	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) +		return 0; + +	/* Compare link counts. */ +	error = xchk_nlinks_compare(xnc); +	if (!xchk_xref_process_error(sc, 0, 0, &error)) +		return error; + +	/* Check one last time for an incomplete dataset. */ +	if (xchk_iscan_aborted(&xnc->collect_iscan)) +		xchk_set_incomplete(sc); + +	return 0; +}  |