aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/agheader_repair.c75
-rw-r--r--fs/xfs/scrub/bitmap.c78
-rw-r--r--fs/xfs/scrub/bitmap.h10
-rw-r--r--fs/xfs/scrub/reap.c498
-rw-r--r--fs/xfs/scrub/reap.h12
-rw-r--r--fs/xfs/scrub/repair.c366
-rw-r--r--fs/xfs/scrub/repair.h18
-rw-r--r--fs/xfs/scrub/trace.h72
-rw-r--r--fs/xfs/xfs_buf.c9
-rw-r--r--fs/xfs/xfs_buf.h13
11 files changed, 673 insertions, 479 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 16e4eb431230..0a5cebb9802b 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -173,6 +173,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
+ reap.o \
repair.o \
)
endif
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index bbaa65422c4f..9e99486b5f20 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -26,6 +26,7 @@
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
+#include "scrub/reap.h"
/* Superblock */
@@ -444,13 +445,13 @@ out_revert:
struct xrep_agfl {
/* Bitmap of alleged AGFL blocks that we're not going to add. */
- struct xbitmap crossed;
+ struct xagb_bitmap crossed;
/* Bitmap of other OWN_AG metadata blocks. */
- struct xbitmap agmetablocks;
+ struct xagb_bitmap agmetablocks;
/* Bitmap of free space. */
- struct xbitmap *freesp;
+ struct xagb_bitmap *freesp;
/* rmapbt cursor for finding crosslinked blocks */
struct xfs_btree_cur *rmap_cur;
@@ -466,7 +467,6 @@ xrep_agfl_walk_rmap(
void *priv)
{
struct xrep_agfl *ra = priv;
- xfs_fsblock_t fsb;
int error = 0;
if (xchk_should_terminate(ra->sc, &error))
@@ -474,14 +474,13 @@ xrep_agfl_walk_rmap(
/* Record all the OWN_AG blocks. */
if (rec->rm_owner == XFS_RMAP_OWN_AG) {
- fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- rec->rm_startblock);
- error = xbitmap_set(ra->freesp, fsb, rec->rm_blockcount);
+ error = xagb_bitmap_set(ra->freesp, rec->rm_startblock,
+ rec->rm_blockcount);
if (error)
return error;
}
- return xbitmap_set_btcur_path(&ra->agmetablocks, cur);
+ return xagb_bitmap_set_btcur_path(&ra->agmetablocks, cur);
}
/* Strike out the blocks that are cross-linked according to the rmapbt. */
@@ -492,12 +491,10 @@ xrep_agfl_check_extent(
void *priv)
{
struct xrep_agfl *ra = priv;
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start);
+ xfs_agblock_t agbno = start;
xfs_agblock_t last_agbno = agbno + len - 1;
int error;
- ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno);
-
while (agbno <= last_agbno) {
bool other_owners;
@@ -507,7 +504,7 @@ xrep_agfl_check_extent(
return error;
if (other_owners) {
- error = xbitmap_set(&ra->crossed, agbno, 1);
+ error = xagb_bitmap_set(&ra->crossed, agbno, 1);
if (error)
return error;
}
@@ -533,7 +530,7 @@ STATIC int
xrep_agfl_collect_blocks(
struct xfs_scrub *sc,
struct xfs_buf *agf_bp,
- struct xbitmap *agfl_extents,
+ struct xagb_bitmap *agfl_extents,
xfs_agblock_t *flcount)
{
struct xrep_agfl ra;
@@ -543,8 +540,8 @@ xrep_agfl_collect_blocks(
ra.sc = sc;
ra.freesp = agfl_extents;
- xbitmap_init(&ra.agmetablocks);
- xbitmap_init(&ra.crossed);
+ xagb_bitmap_init(&ra.agmetablocks);
+ xagb_bitmap_init(&ra.crossed);
/* Find all space used by the free space btrees & rmapbt. */
cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
@@ -556,7 +553,7 @@ xrep_agfl_collect_blocks(
/* Find all blocks currently being used by the bnobt. */
cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
sc->sa.pag, XFS_BTNUM_BNO);
- error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
+ error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur);
xfs_btree_del_cursor(cur, error);
if (error)
goto out_bmp;
@@ -564,7 +561,7 @@ xrep_agfl_collect_blocks(
/* Find all blocks currently being used by the cntbt. */
cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
sc->sa.pag, XFS_BTNUM_CNT);
- error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
+ error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur);
xfs_btree_del_cursor(cur, error);
if (error)
goto out_bmp;
@@ -573,17 +570,17 @@ xrep_agfl_collect_blocks(
* Drop the freesp meta blocks that are in use by btrees.
* The remaining blocks /should/ be AGFL blocks.
*/
- error = xbitmap_disunion(agfl_extents, &ra.agmetablocks);
+ error = xagb_bitmap_disunion(agfl_extents, &ra.agmetablocks);
if (error)
goto out_bmp;
/* Strike out the blocks that are cross-linked. */
ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
- error = xbitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra);
+ error = xagb_bitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra);
xfs_btree_del_cursor(ra.rmap_cur, error);
if (error)
goto out_bmp;
- error = xbitmap_disunion(agfl_extents, &ra.crossed);
+ error = xagb_bitmap_disunion(agfl_extents, &ra.crossed);
if (error)
goto out_bmp;
@@ -591,12 +588,12 @@ xrep_agfl_collect_blocks(
* Calculate the new AGFL size. If we found more blocks than fit in
* the AGFL we'll free them later.
*/
- *flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents),
+ *flcount = min_t(uint64_t, xagb_bitmap_hweight(agfl_extents),
xfs_agfl_size(mp));
out_bmp:
- xbitmap_destroy(&ra.crossed);
- xbitmap_destroy(&ra.agmetablocks);
+ xagb_bitmap_destroy(&ra.crossed);
+ xagb_bitmap_destroy(&ra.agmetablocks);
return error;
}
@@ -626,7 +623,7 @@ xrep_agfl_update_agf(
}
struct xrep_agfl_fill {
- struct xbitmap used_extents;
+ struct xagb_bitmap used_extents;
struct xfs_scrub *sc;
__be32 *agfl_bno;
xfs_agblock_t flcount;
@@ -642,17 +639,15 @@ xrep_agfl_fill(
{
struct xrep_agfl_fill *af = priv;
struct xfs_scrub *sc = af->sc;
- xfs_fsblock_t fsbno = start;
+ xfs_agblock_t agbno = start;
int error;
- while (fsbno < start + len && af->fl_off < af->flcount)
- af->agfl_bno[af->fl_off++] =
- cpu_to_be32(XFS_FSB_TO_AGBNO(sc->mp, fsbno++));
+ trace_xrep_agfl_insert(sc->sa.pag, agbno, len);
- trace_xrep_agfl_insert(sc->mp, sc->sa.pag->pag_agno,
- XFS_FSB_TO_AGBNO(sc->mp, start), len);
+ while (agbno < start + len && af->fl_off < af->flcount)
+ af->agfl_bno[af->fl_off++] = cpu_to_be32(agbno++);
- error = xbitmap_set(&af->used_extents, start, fsbno - 1);
+ error = xagb_bitmap_set(&af->used_extents, start, agbno - 1);
if (error)
return error;
@@ -667,7 +662,7 @@ STATIC int
xrep_agfl_init_header(
struct xfs_scrub *sc,
struct xfs_buf *agfl_bp,
- struct xbitmap *agfl_extents,
+ struct xagb_bitmap *agfl_extents,
xfs_agblock_t flcount)
{
struct xrep_agfl_fill af = {
@@ -695,17 +690,17 @@ xrep_agfl_init_header(
* blocks than fit in the AGFL, they will be freed in a subsequent
* step.
*/
- xbitmap_init(&af.used_extents);
+ xagb_bitmap_init(&af.used_extents);
af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp),
- xbitmap_walk(agfl_extents, xrep_agfl_fill, &af);
- error = xbitmap_disunion(agfl_extents, &af.used_extents);
+ xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af);
+ error = xagb_bitmap_disunion(agfl_extents, &af.used_extents);
if (error)
return error;
/* Write new AGFL to disk. */
xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF);
xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1);
- xbitmap_destroy(&af.used_extents);
+ xagb_bitmap_destroy(&af.used_extents);
return 0;
}
@@ -714,7 +709,7 @@ int
xrep_agfl(
struct xfs_scrub *sc)
{
- struct xbitmap agfl_extents;
+ struct xagb_bitmap agfl_extents;
struct xfs_mount *mp = sc->mp;
struct xfs_buf *agf_bp;
struct xfs_buf *agfl_bp;
@@ -725,7 +720,7 @@ xrep_agfl(
if (!xfs_has_rmapbt(mp))
return -EOPNOTSUPP;
- xbitmap_init(&agfl_extents);
+ xagb_bitmap_init(&agfl_extents);
/*
* Read the AGF so that we can query the rmapbt. We hope that there's
@@ -774,10 +769,10 @@ xrep_agfl(
goto err;
/* Dump any AGFL overflow. */
- error = xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
+ error = xrep_reap_agblocks(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
XFS_AG_RESV_AGFL);
err:
- xbitmap_destroy(&agfl_extents);
+ xagb_bitmap_destroy(&agfl_extents);
return error;
}
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index 0c959be396ea..e0c89a9a0ca0 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -301,21 +301,15 @@ xagb_bitmap_set_btblocks(
* blocks going from the leaf towards the root.
*/
int
-xbitmap_set_btcur_path(
- struct xbitmap *bitmap,
+xagb_bitmap_set_btcur_path(
+ struct xagb_bitmap *bitmap,
struct xfs_btree_cur *cur)
{
- struct xfs_buf *bp;
- xfs_fsblock_t fsb;
int i;
int error;
for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) {
- xfs_btree_get_block(cur, i, &bp);
- if (!bp)
- continue;
- fsb = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
- error = xbitmap_set(bitmap, fsb, 1);
+ error = xagb_bitmap_visit_btblock(cur, i, bitmap);
if (error)
return error;
}
@@ -323,35 +317,6 @@ xbitmap_set_btcur_path(
return 0;
}
-/* Collect a btree's block in the bitmap. */
-STATIC int
-xbitmap_collect_btblock(
- struct xfs_btree_cur *cur,
- int level,
- void *priv)
-{
- struct xbitmap *bitmap = priv;
- struct xfs_buf *bp;
- xfs_fsblock_t fsbno;
-
- xfs_btree_get_block(cur, level, &bp);
- if (!bp)
- return 0;
-
- fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
- return xbitmap_set(bitmap, fsbno, 1);
-}
-
-/* Walk the btree and mark the bitmap wherever a btree block is found. */
-int
-xbitmap_set_btblocks(
- struct xbitmap *bitmap,
- struct xfs_btree_cur *cur)
-{
- return xfs_btree_visit_blocks(cur, xbitmap_collect_btblock,
- XFS_BTREE_VISIT_ALL, bitmap);
-}
-
/* How many bits are set in this bitmap? */
uint64_t
xbitmap_hweight(
@@ -385,43 +350,6 @@ xbitmap_walk(
return error;
}
-struct xbitmap_walk_bits {
- xbitmap_walk_bits_fn fn;
- void *priv;
-};
-
-/* Walk all the bits in a run. */
-static int
-xbitmap_walk_bits_in_run(
- uint64_t start,
- uint64_t len,
- void *priv)
-{
- struct xbitmap_walk_bits *wb = priv;
- uint64_t i;
- int error = 0;
-
- for (i = start; i < start + len; i++) {
- error = wb->fn(i, wb->priv);
- if (error)
- break;
- }
-
- return error;
-}
-
-/* Call a function for every set bit in this bitmap. */
-int
-xbitmap_walk_bits(
- struct xbitmap *bitmap,
- xbitmap_walk_bits_fn fn,
- void *priv)
-{
- struct xbitmap_walk_bits wb = {.fn = fn, .priv = priv};
-
- return xbitmap_walk(bitmap, xbitmap_walk_bits_in_run, &wb);
-}
-
/* Does this bitmap have no bits set at all? */
bool
xbitmap_empty(
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
index 84981724ecaf..4fe58bad6734 100644
--- a/fs/xfs/scrub/bitmap.h
+++ b/fs/xfs/scrub/bitmap.h
@@ -16,10 +16,6 @@ void xbitmap_destroy(struct xbitmap *bitmap);
int xbitmap_clear(struct xbitmap *bitmap, uint64_t start, uint64_t len);
int xbitmap_set(struct xbitmap *bitmap, uint64_t start, uint64_t len);
int xbitmap_disunion(struct xbitmap *bitmap, struct xbitmap *sub);
-int xbitmap_set_btcur_path(struct xbitmap *bitmap,
- struct xfs_btree_cur *cur);
-int xbitmap_set_btblocks(struct xbitmap *bitmap,
- struct xfs_btree_cur *cur);
uint64_t xbitmap_hweight(struct xbitmap *bitmap);
/*
@@ -33,10 +29,6 @@ typedef int (*xbitmap_walk_fn)(uint64_t start, uint64_t len, void *priv);
int xbitmap_walk(struct xbitmap *bitmap, xbitmap_walk_fn fn,
void *priv);
-typedef int (*xbitmap_walk_bits_fn)(uint64_t bit, void *priv);
-int xbitmap_walk_bits(struct xbitmap *bitmap, xbitmap_walk_bits_fn fn,
- void *priv);
-
bool xbitmap_empty(struct xbitmap *bitmap);
bool xbitmap_test(struct xbitmap *bitmap, uint64_t start, uint64_t *len);
@@ -110,5 +102,7 @@ static inline int xagb_bitmap_walk(struct xagb_bitmap *bitmap,
int xagb_bitmap_set_btblocks(struct xagb_bitmap *bitmap,
struct xfs_btree_cur *cur);
+int xagb_bitmap_set_btcur_path(struct xagb_bitmap *bitmap,
+ struct xfs_btree_cur *cur);
#endif /* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
new file mode 100644
index 000000000000..86a62420e02c
--- /dev/null
+++ b/fs/xfs/scrub/reap.c
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_extent_busy.h"
+#include "xfs_ag.h"
+#include "xfs_ag_resv.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_bmap.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_attr_remote.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/reap.h"
+
+/*
+ * Disposal of Blocks from Old Metadata
+ *
+ * Now that we've constructed a new btree to replace the damaged one, we want
+ * to dispose of the blocks that (we think) the old btree was using.
+ * Previously, we used the rmapbt to collect the extents (bitmap) with the
+ * rmap owner corresponding to the tree we rebuilt, collected extents for any
+ * blocks with the same rmap owner that are owned by another data structure
+ * (sublist), and subtracted sublist from bitmap. In theory the extents
+ * remaining in bitmap are the old btree's blocks.
+ *
+ * Unfortunately, it's possible that the btree was crosslinked with other
+ * blocks on disk. The rmap data can tell us if there are multiple owners, so
+ * if the rmapbt says there is an owner of this block other than @oinfo, then
+ * the block is crosslinked. Remove the reverse mapping and continue.
+ *
+ * If there is one rmap record, we can free the block, which removes the
+ * reverse mapping but doesn't add the block to the free space. Our repair
+ * strategy is to hope the other metadata objects crosslinked on this block
+ * will be rebuilt (atop different blocks), thereby removing all the cross
+ * links.
+ *
+ * If there are no rmap records at all, we also free the block. If the btree
+ * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
+ * supposed to be a rmap record and everything is ok. For other btrees there
+ * had to have been an rmap entry for the block to have ended up on @bitmap,
+ * so if it's gone now there's something wrong and the fs will shut down.
+ *
+ * Note: If there are multiple rmap records with only the same rmap owner as
+ * the btree we're trying to rebuild and the block is indeed owned by another
+ * data structure with the same rmap owner, then the block will be in sublist
+ * and therefore doesn't need disposal. If there are multiple rmap records
+ * with only the same rmap owner but the block is not owned by something with
+ * the same rmap owner, the block will be freed.
+ *
+ * The caller is responsible for locking the AG headers for the entire rebuild
+ * operation so that nothing else can sneak in and change the AG state while
+ * we're not looking. We must also invalidate any buffers associated with
+ * @bitmap.
+ */
+
+/* Information about reaping extents after a repair. */
+struct xreap_state {
+ struct xfs_scrub *sc;
+
+ /* Reverse mapping owner and metadata reservation type. */
+ const struct xfs_owner_info *oinfo;
+ enum xfs_ag_resv_type resv;
+
+ /* If true, roll the transaction before reaping the next extent. */
+ bool force_roll;
+
+ /* Number of deferred reaps attached to the current transaction. */
+ unsigned int deferred;
+
+ /* Number of invalidated buffers logged to the current transaction. */
+ unsigned int invalidated;
+
+ /* Number of deferred reaps queued during the whole reap sequence. */
+ unsigned long long total_deferred;
+};
+
+/* Put a block back on the AGFL. */
+STATIC int
+xreap_put_freelist(
+ struct xfs_scrub *sc,
+ xfs_agblock_t agbno)
+{
+ struct xfs_buf *agfl_bp;
+ int error;
+
+ /* Make sure there's space on the freelist. */
+ error = xrep_fix_freelist(sc, true);
+ if (error)
+ return error;
+
+ /*
+ * Since we're "freeing" a lost block onto the AGFL, we have to
+ * create an rmap for the block prior to merging it or else other
+ * parts will break.
+ */
+ error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
+ &XFS_RMAP_OINFO_AG);
+ if (error)
+ return error;
+
+ /* Put the block on the AGFL. */
+ error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
+ if (error)
+ return error;
+
+ error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
+ agfl_bp, agbno, 0);
+ if (error)
+ return error;
+ xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+
+ return 0;
+}
+
+/* Are there any uncommitted reap operations? */
+static inline bool xreap_dirty(const struct xreap_state *rs)
+{
+ if (rs->force_roll)
+ return true;
+ if (rs->deferred)
+ return true;
+ if (rs->invalidated)
+ return true;
+ if (rs->total_deferred)
+ return true;
+ return false;
+}
+
+#define XREAP_MAX_BINVAL (2048)
+
+/*
+ * Decide if we want to roll the transaction after reaping an extent. We don't
+ * want to overrun the transaction reservation, so we prohibit more than
+ * 128 EFIs per transaction. For the same reason, we limit the number
+ * of buffer invalidations to 2048.
+ */
+static inline bool xreap_want_roll(const struct xreap_state *rs)
+{
+ if (rs->force_roll)
+ return true;
+ if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS)
+ return true;
+ if (rs->invalidated > XREAP_MAX_BINVAL)
+ return true;
+ return false;
+}
+
+static inline void xreap_reset(struct xreap_state *rs)
+{
+ rs->total_deferred += rs->deferred;
+ rs->deferred = 0;
+ rs->invalidated = 0;
+ rs->force_roll = false;
+}
+
+#define XREAP_MAX_DEFER_CHAIN (2048)
+
+/*
+ * Decide if we want to finish the deferred ops that are attached to the scrub
+ * transaction. We don't want to queue huge chains of deferred ops because
+ * that can consume a lot of log space and kernel memory. Hence we trigger a
+ * xfs_defer_finish if there are more than 2048 deferred reap operations or the
+ * caller did some real work.
+ */
+static inline bool
+xreap_want_defer_finish(const struct xreap_state *rs)
+{
+ if (rs->force_roll)
+ return true;
+ if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
+ return true;
+ return false;
+}
+
+static inline void xreap_defer_finish_reset(struct xreap_state *rs)
+{
+ rs->total_deferred = 0;
+ rs->deferred = 0;
+ rs->invalidated = 0;
+ rs->force_roll = false;
+}
+
+/* Try to invalidate the incore buffers for an extent that we're freeing. */
+STATIC void
+xreap_agextent_binval(
+ struct xreap_state *rs,
+ xfs_agblock_t agbno,
+ xfs_extlen_t *aglenp)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agnumber_t agno = sc->sa.pag->pag_agno;
+ xfs_agblock_t agbno_next = agbno + *aglenp;
+ xfs_agblock_t bno = agbno;
+
+ /*
+ * Avoid invalidating AG headers and post-EOFS blocks because we never
+ * own those.
+ */
+ if (!xfs_verify_agbno(pag, agbno) ||
+ !xfs_verify_agbno(pag, agbno_next - 1))
+ return;
+
+ /*
+ * If there are incore buffers for these blocks, invalidate them. We
+ * assume that the lack of any other known owners means that the buffer
+ * can be locked without risk of deadlocking. The buffer cache cannot
+ * detect aliasing, so employ nested loops to scan for incore buffers
+ * of any plausible size.
+ */
+ while (bno < agbno_next) {
+ xfs_agblock_t fsbcount;
+ xfs_agblock_t max_fsbs;
+
+ /*
+ * Max buffer size is the max remote xattr buffer size, which
+ * is one fs block larger than 64k.
+ */
+ max_fsbs = min_t(xfs_agblock_t, agbno_next - bno,
+ xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
+
+ for (fsbcount = 1; fsbcount < max_fsbs; fsbcount++) {
+ struct xfs_buf *bp = NULL;
+ xfs_daddr_t daddr;
+ int error;
+
+ daddr = XFS_AGB_TO_DADDR(mp, agno, bno);
+ error = xfs_buf_incore(mp->m_ddev_targp, daddr,
+ XFS_FSB_TO_BB(mp, fsbcount),
+ XBF_LIVESCAN, &bp);
+ if (error)
+ continue;
+
+ xfs_trans_bjoin(sc->tp, bp);
+ xfs_trans_binval(sc->tp, bp);
+ rs->invalidated++;
+
+ /*
+ * Stop invalidating if we've hit the limit; we should
+ * still have enough reservation left to free however
+ * far we've gotten.
+ */
+ if (rs->invalidated > XREAP_MAX_BINVAL) {
+ *aglenp -= agbno_next - bno;
+ goto out;
+ }
+ }
+
+ bno++;
+ }
+
+out:
+ trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
+}
+
+/*
+ * Figure out the longest run of blocks that we can dispose of with a single
+ * call. Cross-linked blocks should have their reverse mappings removed, but
+ * single-owner extents can be freed. AGFL blocks can only be put back one at
+ * a time.
+ */
+STATIC int
+xreap_agextent_select(
+ struct xreap_state *rs,
+ xfs_agblock_t agbno,
+ xfs_agblock_t agbno_next,
+ bool *crosslinked,
+ xfs_extlen_t *aglenp)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_btree_cur *cur;
+ xfs_agblock_t bno = agbno + 1;
+ xfs_extlen_t len = 1;
+ int error;
+
+ /*
+ * Determine if there are any other rmap records covering the first
+ * block of this extent. If so, the block is crosslinked.
+ */
+ cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.pag);
+ error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
+ crosslinked);
+ if (error)
+ goto out_cur;
+
+ /* AGFL blocks can only be deal with one at a time. */
+ if (rs->resv == XFS_AG_RESV_AGFL)
+ goto out_found;
+
+ /*
+ * Figure out how many of the subsequent blocks have the same crosslink
+ * status.
+ */
+ while (bno < agbno_next) {
+ bool also_crosslinked;
+
+ error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
+ &also_crosslinked);
+ if (error)
+ goto out_cur;
+
+ if (*crosslinked != also_crosslinked)
+ break;
+
+ len++;
+ bno++;
+ }
+
+out_found:
+ *aglenp = len;
+ trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
+out_cur:
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/*
+ * Dispose of as much of the beginning of this AG extent as possible. The
+ * number of blocks disposed of will be returned in @aglenp.
+ */
+STATIC int
+xreap_agextent_iter(
+ struct xreap_state *rs,
+ xfs_agblock_t agbno,
+ xfs_extlen_t *aglenp,
+ bool crosslinked)
+{
+ struct xfs_scrub *sc = rs->sc;
+ xfs_fsblock_t fsbno;
+ int error = 0;
+
+ fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
+
+ /*
+ * If there are other rmappings, this block is cross linked and must
+ * not be freed. Remove the reverse mapping and move on. Otherwise,
+ * we were the only owner of the block, so free the extent, which will
+ * also remove the rmap.
+ *
+ * XXX: XFS doesn't support detecting the case where a single block
+ * metadata structure is crosslinked with a multi-block structure
+ * because the buffer cache doesn't detect aliasing problems, so we
+ * can't fix 100% of crosslinking problems (yet). The verifiers will
+ * blow on writeout, the filesystem will shut down, and the admin gets
+ * to run xfs_repair.
+ */
+ if (crosslinked) {
+ trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
+
+ rs->force_roll = true;
+ return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
+ *aglenp, rs->oinfo);
+ }
+
+ trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
+
+ /*
+ * Invalidate as many buffers as we can, starting at agbno. If this
+ * function sets *aglenp to zero, the transaction is full of logged
+ * buffer invalidations, so we need to return early so that we can
+ * roll and retry.
+ */
+ xreap_agextent_binval(rs, agbno, aglenp);
+ if (*aglenp == 0) {
+ ASSERT(xreap_want_roll(rs));
+ return 0;
+ }
+
+ /* Put blocks back on the AGFL one at a time. */
+ if (rs->resv == XFS_AG_RESV_AGFL) {
+ ASSERT(*aglenp == 1);
+ error = xreap_put_freelist(sc, agbno);
+ if (error)
+ return error;
+
+ rs->force_roll = true;
+ return 0;
+ }
+
+ /*
+ * Use deferred frees to get rid of the old btree blocks to try to
+ * minimize the window in which we could crash and lose the old blocks.
+ */
+ error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
+ rs->resv, true);
+ if (error)
+ return error;
+
+ rs->deferred++;
+ return 0;
+}
+
+/*
+ * Break an AG metadata extent into sub-extents by fate (crosslinked, not
+ * crosslinked), and dispose of each sub-extent separately.
+ */
+STATIC int
+xreap_agmeta_extent(
+ uint64_t fsbno,
+ uint64_t len,
+ void *priv)
+{
+ struct xreap_state *rs = priv;
+ struct xfs_scrub *sc = rs->sc;
+ xfs_agblock_t agbno = fsbno;
+ xfs_agblock_t agbno_next = agbno + len;
+ int error = 0;
+
+ ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
+ ASSERT(sc->ip == NULL);
+
+ while (agbno < agbno_next) {
+ xfs_extlen_t aglen;
+ bool crosslinked;
+
+ error = xreap_agextent_select(rs, agbno, agbno_next,
+ &crosslinked, &aglen);
+ if (error)
+ return error;
+
+ error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
+ if (error)
+ return error;
+
+ if (xreap_want_defer_finish(rs)) {
+ error = xrep_defer_finish(sc);
+ if (error)
+ return error;
+ xreap_defer_finish_reset(rs);
+ } else if (xreap_want_roll(rs)) {
+ error = xrep_roll_ag_trans(sc);
+ if (error)
+ return error;
+ xreap_reset(rs);
+ }
+
+ agbno += aglen;
+ }
+
+ return 0;
+}
+
+/* Dispose of every block of every AG metadata extent in the bitmap. */
+int
+xrep_reap_agblocks(
+ struct xfs_scrub *sc,
+ struct xagb_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
+{
+ struct xreap_state rs = {
+ .sc = sc,
+ .oinfo = oinfo,
+ .resv = type,
+ };
+ int error;
+
+ ASSERT(xfs_has_rmapbt(sc->mp));
+ ASSERT(sc->ip == NULL);
+
+ error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
+ if (error)
+ return error;
+
+ if (xreap_dirty(&rs))
+ return xrep_defer_finish(sc);
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/reap.h b/fs/xfs/scrub/reap.h
new file mode 100644
index 000000000000..fe24626af164
--- /dev/null
+++ b/fs/xfs/scrub/reap.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#ifndef __XFS_SCRUB_REAP_H__
+#define __XFS_SCRUB_REAP_H__
+
+int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
+
+#endif /* __XFS_SCRUB_REAP_H__ */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index ac6d8803e660..83a1b1437a4f 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -26,6 +26,7 @@
#include "xfs_ag_resv.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
+#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -166,6 +167,56 @@ xrep_roll_ag_trans(
return 0;
}
+/* Finish all deferred work attached to the repair transaction. */
+int
+xrep_defer_finish(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ /*
+ * Keep the AG header buffers locked while we complete deferred work
+ * items. Ensure that both AG buffers are dirty and held when we roll
+ * the transaction so that they move forward in the log without losing
+ * the bli (and hence the bli type) when the transaction commits.
+ *
+ * Normal code would never hold clean buffers across a roll, but repair
+ * needs both buffers to maintain a total lock on the AG.
+ */
+ if (sc->sa.agi_bp) {
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
+ xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+ }
+
+ if (sc->sa.agf_bp) {
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
+ xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+ }
+
+ /*
+ * Finish all deferred work items. We still hold the AG header buffers
+ * locked regardless of whether or not that succeeds. On failure, the
+ * buffers will be released during teardown on our way out of the
+ * kernel. If successful, join the buffers to the new transaction
+ * and move on.
+ */
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ return error;
+
+ /*
+ * Release the hold that we set above because defer_finish won't do
+ * that for us. The defer roll code redirties held buffers after each
+ * roll, so the AG header buffers should be ready for logging.
+ */
+ if (sc->sa.agi_bp)
+ xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+ if (sc->sa.agf_bp)
+ xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+
+ return 0;
+}
+
/*
* Does the given AG have enough space to rebuild a btree? Neither AG
* reservation can be critical, and we must have enough space (factoring
@@ -297,89 +348,6 @@ xrep_calc_ag_resblks(
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
}
-/* Allocate a block in an AG. */
-int
-xrep_alloc_ag_block(
- struct xfs_scrub *sc,
- const struct xfs_owner_info *oinfo,
- xfs_fsblock_t *fsbno,
- enum xfs_ag_resv_type resv)
-{
- struct xfs_alloc_arg args = {0};
- xfs_agblock_t bno;
- int error;
-
- switch (resv) {
- case XFS_AG_RESV_AGFL:
- case XFS_AG_RESV_RMAPBT:
- error = xfs_alloc_get_freelist(sc->sa.pag, sc->tp,
- sc->sa.agf_bp, &bno, 1);
- if (error)
- return error;
- if (bno == NULLAGBLOCK)
- return -ENOSPC;
- xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, 1, false);
- *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno);
- if (resv == XFS_AG_RESV_RMAPBT)
- xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno);
- return 0;
- default:
- break;
- }
-
- args.tp = sc->tp;
- args.mp = sc->mp;
- args.pag = sc->sa.pag;
- args.oinfo = *oinfo;
- args.minlen = 1;
- args.maxlen = 1;
- args.prod = 1;
- args.resv = resv;
-
- error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno);
- if (error)
- return error;
- if (args.fsbno == NULLFSBLOCK)
- return -ENOSPC;
- ASSERT(args.len == 1);
- *fsbno = args.fsbno;
-
- return 0;
-}
-
-/* Initialize a new AG btree root block with zero entries. */
-int
-xrep_init_btblock(
- struct xfs_scrub *sc,
- xfs_fsblock_t fsb,
- struct xfs_buf **bpp,
- xfs_btnum_t btnum,
- const struct xfs_buf_ops *ops)
-{
- struct xfs_trans *tp = sc->tp;
- struct xfs_mount *mp = sc->mp;
- struct xfs_buf *bp;
- int error;
-
- trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
- XFS_FSB_TO_AGBNO(mp, fsb), btnum);
-
- ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.pag->pag_agno);
- error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0,
- &bp);
- if (error)
- return error;
- xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
- xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.pag->pag_agno);
- xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
- xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1);
- bp->b_ops = ops;
- *bpp = bp;
-
- return 0;
-}
-
/*
* Reconstructing per-AG Btrees
*
@@ -404,91 +372,8 @@ xrep_init_btblock(
* sublist. As with the other btrees we subtract sublist from bitmap, and the
* result (since the rmapbt lives in the free space) are the blocks from the
* old rmapbt.
- *
- * Disposal of Blocks from Old per-AG Btrees
- *
- * Now that we've constructed a new btree to replace the damaged one, we want
- * to dispose of the blocks that (we think) the old btree was using.
- * Previously, we used the rmapbt to collect the extents (bitmap) with the
- * rmap owner corresponding to the tree we rebuilt, collected extents for any
- * blocks with the same rmap owner that are owned by another data structure
- * (sublist), and subtracted sublist from bitmap. In theory the extents
- * remaining in bitmap are the old btree's blocks.
- *
- * Unfortunately, it's possible that the btree was crosslinked with other
- * blocks on disk. The rmap data can tell us if there are multiple owners, so
- * if the rmapbt says there is an owner of this block other than @oinfo, then
- * the block is crosslinked. Remove the reverse mapping and continue.
- *
- * If there is one rmap record, we can free the block, which removes the
- * reverse mapping but doesn't add the block to the free space. Our repair
- * strategy is to hope the other metadata objects crosslinked on this block
- * will be rebuilt (atop different blocks), thereby removing all the cross
- * links.
- *
- * If there are no rmap records at all, we also free the block. If the btree
- * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
- * supposed to be a rmap record and everything is ok. For other btrees there
- * had to have been an rmap entry for the block to have ended up on @bitmap,
- * so if it's gone now there's something wrong and the fs will shut down.
- *
- * Note: If there are multiple rmap records with only the same rmap owner as
- * the btree we're trying to rebuild and the block is indeed owned by another
- * data structure with the same rmap owner, then the block will be in sublist
- * and therefore doesn't need disposal. If there are multiple rmap records
- * with only the same rmap owner but the block is not owned by something with
- * the same rmap owner, the block will be freed.
- *
- * The caller is responsible for locking the AG headers for the entire rebuild
- * operation so that nothing else can sneak in and change the AG state while
- * we're not looking. We also assume that the caller already invalidated any
- * buffers associated with @bitmap.
*/
-static int
-xrep_invalidate_block(
- uint64_t fsbno,
- void *priv)
-{
- struct xfs_scrub *sc = priv;
- struct xfs_buf *bp;
- int error;
-
- /* Skip AG headers and post-EOFS blocks */
- if (!xfs_verify_fsbno(sc->mp, fsbno))
- return 0;
-
- error = xfs_buf_incore(sc->mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(sc->mp, fsbno),
- XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK, &bp);
- if (error)
- return 0;
-
- xfs_trans_bjoin(sc->tp, bp);
- xfs_trans_binval(sc->tp, bp);
- return 0;
-}
-
-/*
- * Invalidate buffers for per-AG btree blocks we're dumping. This function
- * is not intended for use with file data repairs; we have bunmapi for that.
- */
-int
-xrep_invalidate_blocks(
- struct xfs_scrub *sc,
- struct xbitmap *bitmap)
-{
- /*
- * For each block in each extent, see if there's an incore buffer for
- * exactly that block; if so, invalidate it. The buffer cache only
- * lets us look for one buffer at a time, so we have to look one block
- * at a time. Avoid invalidating AG headers and post-EOFS blocks
- * because we never own those; and if we can't TRYLOCK the buffer we
- * assume it's owned by someone else.
- */
- return xbitmap_walk_bits(bitmap, xrep_invalidate_block, sc);
-}
-
/* Ensure the freelist is the correct size. */
int
xrep_fix_freelist(
@@ -507,155 +392,6 @@ xrep_fix_freelist(
can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
}
-/* Information about reaping extents after a repair. */
-struct xrep_reap_state {
- struct xfs_scrub *sc;
-
- /* Reverse mapping owner and metadata reservation type. */
- const struct xfs_owner_info *oinfo;
- enum xfs_ag_resv_type resv;
-};
-
-/*
- * Put a block back on the AGFL.
- */
-STATIC int
-xrep_put_freelist(
- struct xfs_scrub *sc,
- xfs_agblock_t agbno)
-{
- struct xfs_buf *agfl_bp;
- int error;
-
- /* Make sure there's space on the freelist. */
- error = xrep_fix_freelist(sc, true);
- if (error)
- return error;
-
- /*
- * Since we're "freeing" a lost block onto the AGFL, we have to
- * create an rmap for the block prior to merging it or else other
- * parts will break.
- */
- error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
- &XFS_RMAP_OINFO_AG);
- if (error)
- return error;
-
- /* Put the block on the AGFL. */
- error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
- if (error)
- return error;
-
- error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
- agfl_bp, agbno, 0);
- if (error)
- return error;
- xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
- XFS_EXTENT_BUSY_SKIP_DISCARD);
-
- return 0;
-}
-
-/* Dispose of a single block. */
-STATIC int
-xrep_reap_block(
- uint64_t fsbno,
- void *priv)
-{
- struct xrep_reap_state *rs = priv;
- struct xfs_scrub *sc = rs->sc;
- struct xfs_btree_cur *cur;
- struct xfs_buf *agf_bp = NULL;
- xfs_agblock_t agbno;
- bool has_other_rmap;
- int error;
-
- ASSERT(sc->ip != NULL ||
- XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
- trace_xrep_dispose_btree_extent(sc->mp,
- XFS_FSB_TO_AGNO(sc->mp, fsbno),
- XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
-
- agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
- ASSERT(XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
-
- /*
- * If we are repairing per-inode metadata, we need to read in the AGF
- * buffer. Otherwise, we're repairing a per-AG structure, so reuse
- * the AGF buffer that the setup functions already grabbed.
- */
- if (sc->ip) {
- error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp);
- if (error)
- return error;
- } else {
- agf_bp = sc->sa.agf_bp;
- }
- cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, sc->sa.pag);
-
- /* Can we find any other rmappings? */
- error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
- &has_other_rmap);
- xfs_btree_del_cursor(cur, error);
- if (error)
- goto out_free;
-
- /*
- * If there are other rmappings, this block is cross linked and must
- * not be freed. Remove the reverse mapping and move on. Otherwise,
- * we were the only owner of the block, so free the extent, which will
- * also remove the rmap.
- *
- * XXX: XFS doesn't support detecting the case where a single block
- * metadata structure is crosslinked with a multi-block structure
- * because the buffer cache doesn't detect aliasing problems, so we
- * can't fix 100% of crosslinking problems (yet). The verifiers will
- * blow on writeout, the filesystem will shut down, and the admin gets
- * to run xfs_repair.
- */
- if (has_other_rmap)
- error = xfs_rmap_free(sc->tp, agf_bp, sc->sa.pag, agbno,
- 1, rs->oinfo);
- else if (rs->resv == XFS_AG_RESV_AGFL)
- error = xrep_put_freelist(sc, agbno);
- else
- error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, rs->oinfo,
- rs->resv);
- if (agf_bp != sc->sa.agf_bp)
- xfs_trans_brelse(sc->tp, agf_bp);
- if (error)
- return error;
-
- if (sc->ip)
- return xfs_trans_roll_inode(&sc->tp, sc->ip);
- return xrep_roll_ag_trans(sc);
-
-out_free:
- if (agf_bp != sc->sa.agf_bp)
- xfs_trans_brelse(sc->tp, agf_bp);
- return error;
-}
-
-/* Dispose of every block of every extent in the bitmap. */
-int
-xrep_reap_extents(
- struct xfs_scrub *sc,
- struct xbitmap *bitmap,
- const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type)
-{
- struct xrep_reap_state rs = {
- .sc = sc,
- .oinfo = oinfo,
- .resv = type,
- };
-
- ASSERT(xfs_has_rmapbt(sc->mp));
-
- return xbitmap_walk_bits(bitmap, xrep_reap_block, &rs);
-}
-
/*
* Finding per-AG Btree Roots for AGF/AGI Reconstruction
*
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index dce791c679ee..45478040a19d 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -15,28 +15,28 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
#ifdef CONFIG_XFS_ONLINE_REPAIR
+/*
+ * This is the maximum number of deferred extent freeing item extents (EFIs)
+ * that we'll attach to a transaction without rolling the transaction to avoid
+ * overrunning a tr_itruncate reservation.
+ */
+#define XREP_MAX_ITRUNCATE_EFIS (128)
+
+
/* Repair helpers */
int xrep_attempt(struct xfs_scrub *sc);
void xrep_failure(struct xfs_mount *mp);
int xrep_roll_ag_trans(struct xfs_scrub *sc);
+int xrep_defer_finish(struct xfs_scrub *sc);
bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
enum xfs_ag_resv_type type);
xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
-int xrep_alloc_ag_block(struct xfs_scrub *sc,
- const struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
- enum xfs_ag_resv_type resv);
-int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
- struct xfs_buf **bpp, xfs_btnum_t btnum,
- const struct xfs_buf_ops *ops);
struct xbitmap;
struct xagb_bitmap;
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
-int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xbitmap *btlist);
-int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist,
- const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
struct xrep_find_ag_btree {
/* in: rmap owner of the btree we're looking for */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index b3894daeb86a..73cf1002bd94 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -729,9 +729,8 @@ TRACE_EVENT(xchk_refcount_incorrect,
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
DECLARE_EVENT_CLASS(xrep_extent_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, agbno, len),
+ TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len),
+ TP_ARGS(pag, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -739,8 +738,8 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
__field(xfs_extlen_t, len)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
__entry->agbno = agbno;
__entry->len = len;
),
@@ -752,12 +751,45 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
);
#define DEFINE_REPAIR_EXTENT_EVENT(name) \
DEFINE_EVENT(xrep_extent_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len), \
- TP_ARGS(mp, agno, agbno, len))
-DEFINE_REPAIR_EXTENT_EVENT(xrep_dispose_btree_extent);
+ TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len), \
+ TP_ARGS(pag, agbno, len))
+DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent);
+DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent);
+DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval);
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
+DECLARE_EVENT_CLASS(xrep_reap_find_class,
+ TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len,
+ bool crosslinked),
+ TP_ARGS(pag, agbno, len, crosslinked),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ __field(bool, crosslinked)
+ ),
+ TP_fast_assign(
+ __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->agno = pag->pag_agno;
+ __entry->agbno = agbno;
+ __entry->len = len;
+ __entry->crosslinked = crosslinked;
+ ),
+ TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x crosslinked %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->len,
+ __entry->crosslinked ? 1 : 0)
+);
+#define DEFINE_REPAIR_REAP_FIND_EVENT(name) \
+DEFINE_EVENT(xrep_reap_find_class, name, \
+ TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len, \
+ bool crosslinked), \
+ TP_ARGS(pag, agbno, len, crosslinked))
+DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select);
+
DECLARE_EVENT_CLASS(xrep_rmap_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len,
@@ -827,28 +859,6 @@ TRACE_EVENT(xrep_refcount_extent_fn,
__entry->refcount)
)
-TRACE_EVENT(xrep_init_btblock,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_btnum_t btnum),
- TP_ARGS(mp, agno, agbno, btnum),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(uint32_t, btnum)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->btnum = btnum;
- ),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x btree %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->agbno,
- __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS))
-)
TRACE_EVENT(xrep_findroot_block,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
uint32_t magic, uint16_t level),
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 15d1e5a7c2d3..fa392c43ba16 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -481,7 +481,8 @@ _xfs_buf_obj_cmp(
* reallocating a busy extent. Skip this buffer and
* continue searching for an exact match.
*/
- ASSERT(bp->b_flags & XBF_STALE);
+ if (!(map->bm_flags & XBM_LIVESCAN))
+ ASSERT(bp->b_flags & XBF_STALE);
return 1;
}
return 0;
@@ -559,6 +560,10 @@ xfs_buf_find_lock(
* intact here.
*/
if (bp->b_flags & XBF_STALE) {
+ if (flags & XBF_LIVESCAN) {
+ xfs_buf_unlock(bp);
+ return -ENOENT;
+ }
ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
bp->b_ops = NULL;
@@ -682,6 +687,8 @@ xfs_buf_get_map(
int error;
int i;
+ if (flags & XBF_LIVESCAN)
+ cmap.bm_flags |= XBM_LIVESCAN;
for (i = 0; i < nmaps; i++)
cmap.bm_len += map[i].bm_len;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 549c60942208..df8f47953bb4 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -44,6 +44,11 @@ struct xfs_buf;
#define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */
/* flags used only as arguments to access routines */
+/*
+ * Online fsck is scanning the buffer cache for live buffers. Do not warn
+ * about length mismatches during lookups and do not return stale buffers.
+ */
+#define XBF_LIVESCAN (1u << 28)
#define XBF_INCORE (1u << 29)/* lookup only, return if found in cache */
#define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */
#define XBF_UNMAPPED (1u << 31)/* do not map the buffer */
@@ -67,6 +72,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
/* The following interface flags should never be set */ \
+ { XBF_LIVESCAN, "LIVESCAN" }, \
{ XBF_INCORE, "INCORE" }, \
{ XBF_TRYLOCK, "TRYLOCK" }, \
{ XBF_UNMAPPED, "UNMAPPED" }
@@ -114,8 +120,15 @@ typedef struct xfs_buftarg {
struct xfs_buf_map {
xfs_daddr_t bm_bn; /* block number for I/O */
int bm_len; /* size of I/O */
+ unsigned int bm_flags;
};
+/*
+ * Online fsck is scanning the buffer cache for live buffers. Do not warn
+ * about length mismatches during lookups and do not return stale buffers.
+ */
+#define XBM_LIVESCAN (1U << 0)
+
#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };