aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandan Babu R <[email protected]>2024-02-24 09:58:28 +0530
committerChandan Babu R <[email protected]>2024-02-24 09:58:28 +0530
commit5d1bd19d8305e6e2faf90d9febd51eaf60698e01 (patch)
treeae58915d05428b70e9b0f64e1966e9a5daf28234
parentf10775795302a592e97ae6427e449ed220d5ad03 (diff)
parent4ed080cd7cb077bbb4b64f0712be1618c9d55a0d (diff)
Merge tag 'repair-fscounters-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC
xfs: online repair for fs summary counters A longstanding deficiency in the online fs summary counter scrubbing code is that it hasn't any means to quiesce the incore percpu counters while it's running. There is no way to coordinate with other threads are reserving or freeing free space simultaneously, which leads to false error reports. Right now, if the discrepancy is large, we just sort of shrug and bail out with an incomplete flag, but this is lame. For repair activity, we actually /do/ need to stabilize the counters to get an accurate reading and install it in the percpu counter. To improve the former and enable the latter, allow the fscounters online fsck code to perform an exclusive mini-freeze on the filesystem. The exclusivity prevents userspace from thawing while we're running, and the mini-freeze means that we don't wait for the log to quiesce, which will make both speedier. Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: Chandan Babu R <[email protected]> * tag 'repair-fscounters-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: repair summary counters
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/fscounters.c27
-rw-r--r--fs/xfs/scrub/fscounters.h20
-rw-r--r--fs/xfs/scrub/fscounters_repair.c72
-rw-r--r--fs/xfs/scrub/repair.h2
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h21
8 files changed, 128 insertions, 18 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 253744092915..ba8608f469ac 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -191,6 +191,7 @@ xfs-y += $(addprefix scrub/, \
alloc_repair.o \
bmap_repair.o \
cow_repair.o \
+ fscounters_repair.o \
ialloc_repair.o \
inode_repair.o \
newbt.o \
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 893c5a6e3ddb..d310737c8823 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -22,6 +22,7 @@
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
+#include "scrub/fscounters.h"
/*
* FS Summary Counters
@@ -48,17 +49,6 @@
* our tolerance for mismatch between expected and actual counter values.
*/
-struct xchk_fscounters {
- struct xfs_scrub *sc;
- uint64_t icount;
- uint64_t ifree;
- uint64_t fdblocks;
- uint64_t frextents;
- unsigned long long icount_min;
- unsigned long long icount_max;
- bool frozen;
-};
-
/*
* Since the expected value computation is lockless but only browses incore
* values, the percpu counters should be fairly close to each other. However,
@@ -235,8 +225,13 @@ xchk_setup_fscounters(
* Pause all writer activity in the filesystem while we're scrubbing to
* reduce the likelihood of background perturbations to the counters
* throwing off our calculations.
+ *
+ * If we're repairing, we need to prevent any other thread from
+ * changing the global fs summary counters while we're repairing them.
+ * This requires the fs to be frozen, which will disable background
+ * reclaim and purge all inactive inodes.
*/
- if (sc->flags & XCHK_TRY_HARDER) {
+ if ((sc->flags & XCHK_TRY_HARDER) || xchk_could_repair(sc)) {
error = xchk_fscounters_freeze(sc);
if (error)
return error;
@@ -254,7 +249,9 @@ xchk_setup_fscounters(
* set the INCOMPLETE flag even when a negative errno is returned. This care
* must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
* ECANCELED) that are absorbed into a scrub state flag update by
- * xchk_*_process_error.
+ * xchk_*_process_error. Scrub and repair share the same incore data
+ * structures, so the INCOMPLETE flag is critical to prevent a repair based on
+ * insufficient information.
*/
/* Count free space btree blocks manually for pre-lazysbcount filesystems. */
@@ -482,6 +479,10 @@ xchk_fscount_within_range(
if (curr_value == expected)
return true;
+ /* We require exact matches when repair is running. */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ return false;
+
min_value = min(old_value, curr_value);
max_value = max(old_value, curr_value);
diff --git a/fs/xfs/scrub/fscounters.h b/fs/xfs/scrub/fscounters.h
new file mode 100644
index 000000000000..461a13d25f4b
--- /dev/null
+++ b/fs/xfs/scrub/fscounters.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#ifndef __XFS_SCRUB_FSCOUNTERS_H__
+#define __XFS_SCRUB_FSCOUNTERS_H__
+
+struct xchk_fscounters {
+ struct xfs_scrub *sc;
+ uint64_t icount;
+ uint64_t ifree;
+ uint64_t fdblocks;
+ uint64_t frextents;
+ unsigned long long icount_min;
+ unsigned long long icount_max;
+ bool frozen;
+};
+
+#endif /* __XFS_SCRUB_FSCOUNTERS_H__ */
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c
new file mode 100644
index 000000000000..94cdb852bee4
--- /dev/null
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_health.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/fscounters.h"
+
+/*
+ * FS Summary Counters
+ * ===================
+ *
+ * We correct errors in the filesystem summary counters by setting them to the
+ * values computed during the obligatory scrub phase. However, we must be
+ * careful not to allow any other thread to change the counters while we're
+ * computing and setting new values. To achieve this, we freeze the
+ * filesystem for the whole operation if the REPAIR flag is set. The checking
+ * function is stricter when we've frozen the fs.
+ */
+
+/*
+ * Reset the superblock counters. Caller is responsible for freezing the
+ * filesystem during the calculation and reset phases.
+ */
+int
+xrep_fscounters(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xchk_fscounters *fsc = sc->buf;
+
+ /*
+ * Reinitialize the in-core counters from what we computed. We froze
+ * the filesystem, so there shouldn't be anyone else trying to modify
+ * these counters.
+ */
+ if (!fsc->frozen) {
+ ASSERT(fsc->frozen);
+ return -EFSCORRUPTED;
+ }
+
+ trace_xrep_reset_counters(mp, fsc);
+
+ percpu_counter_set(&mp->m_icount, fsc->icount);
+ percpu_counter_set(&mp->m_ifree, fsc->ifree);
+ percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks);
+ percpu_counter_set(&mp->m_frextents, fsc->frextents);
+ mp->m_sb.sb_frextents = fsc->frextents;
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 8edac0150e96..2ff2bb79c540 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -117,6 +117,7 @@ int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_bmap_cow(struct xfs_scrub *sc);
int xrep_nlinks(struct xfs_scrub *sc);
+int xrep_fscounters(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
@@ -198,6 +199,7 @@ xrep_setup_nothing(
#define xrep_quota xrep_notsupported
#define xrep_quotacheck xrep_notsupported
#define xrep_nlinks xrep_notsupported
+#define xrep_fscounters xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 0f23b7f36d4a..aeac9cae4ad4 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -364,7 +364,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_FS,
.setup = xchk_setup_fscounters,
.scrub = xchk_fscounters,
- .repair = xrep_notsupported,
+ .repair = xrep_fscounters,
},
[XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */
.type = ST_FS,
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 2d5a330afe10..b8f3795f7d9b 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -24,6 +24,7 @@
#include "scrub/quota.h"
#include "scrub/iscan.h"
#include "scrub/nlinks.h"
+#include "scrub/fscounters.h"
/* Figure out which block the btree cursor was pointing to. */
static inline xfs_fsblock_t
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index b4e65f148e7b..1e448d0c5aee 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -24,6 +24,7 @@ struct xfarray_sortinfo;
struct xchk_dqiter;
struct xchk_iscan;
struct xchk_nlink;
+struct xchk_fscounters;
/*
* ftrace's __print_symbolic requires that all enum values be wrapped in the
@@ -1804,16 +1805,28 @@ TRACE_EVENT(xrep_calc_ag_resblks_btsize,
__entry->refcbt_sz)
)
TRACE_EVENT(xrep_reset_counters,
- TP_PROTO(struct xfs_mount *mp),
- TP_ARGS(mp),
+ TP_PROTO(struct xfs_mount *mp, struct xchk_fscounters *fsc),
+ TP_ARGS(mp, fsc),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(uint64_t, icount)
+ __field(uint64_t, ifree)
+ __field(uint64_t, fdblocks)
+ __field(uint64_t, frextents)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
+ __entry->icount = fsc->icount;
+ __entry->ifree = fsc->ifree;
+ __entry->fdblocks = fsc->fdblocks;
+ __entry->frextents = fsc->frextents;
),
- TP_printk("dev %d:%d",
- MAJOR(__entry->dev), MINOR(__entry->dev))
+ TP_printk("dev %d:%d icount %llu ifree %llu fdblocks %llu frextents %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->icount,
+ __entry->ifree,
+ __entry->fdblocks,
+ __entry->frextents)
)
DECLARE_EVENT_CLASS(xrep_newbt_extent_class,