diff options
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/agheader_repair.c | 75 | ||||
-rw-r--r-- | fs/xfs/scrub/bitmap.c | 78 | ||||
-rw-r--r-- | fs/xfs/scrub/bitmap.h | 10 | ||||
-rw-r--r-- | fs/xfs/scrub/reap.c | 498 | ||||
-rw-r--r-- | fs/xfs/scrub/reap.h | 12 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 366 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 18 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 72 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 13 |
11 files changed, 673 insertions, 479 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 16e4eb431230..0a5cebb9802b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -173,6 +173,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) xfs-y += $(addprefix scrub/, \ agheader_repair.o \ + reap.o \ repair.o \ ) endif diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index bbaa65422c4f..9e99486b5f20 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -26,6 +26,7 @@ #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" +#include "scrub/reap.h" /* Superblock */ @@ -444,13 +445,13 @@ out_revert: struct xrep_agfl { /* Bitmap of alleged AGFL blocks that we're not going to add. */ - struct xbitmap crossed; + struct xagb_bitmap crossed; /* Bitmap of other OWN_AG metadata blocks. */ - struct xbitmap agmetablocks; + struct xagb_bitmap agmetablocks; /* Bitmap of free space. */ - struct xbitmap *freesp; + struct xagb_bitmap *freesp; /* rmapbt cursor for finding crosslinked blocks */ struct xfs_btree_cur *rmap_cur; @@ -466,7 +467,6 @@ xrep_agfl_walk_rmap( void *priv) { struct xrep_agfl *ra = priv; - xfs_fsblock_t fsb; int error = 0; if (xchk_should_terminate(ra->sc, &error)) @@ -474,14 +474,13 @@ xrep_agfl_walk_rmap( /* Record all the OWN_AG blocks. */ if (rec->rm_owner == XFS_RMAP_OWN_AG) { - fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, - rec->rm_startblock); - error = xbitmap_set(ra->freesp, fsb, rec->rm_blockcount); + error = xagb_bitmap_set(ra->freesp, rec->rm_startblock, + rec->rm_blockcount); if (error) return error; } - return xbitmap_set_btcur_path(&ra->agmetablocks, cur); + return xagb_bitmap_set_btcur_path(&ra->agmetablocks, cur); } /* Strike out the blocks that are cross-linked according to the rmapbt. */ @@ -492,12 +491,10 @@ xrep_agfl_check_extent( void *priv) { struct xrep_agfl *ra = priv; - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start); + xfs_agblock_t agbno = start; xfs_agblock_t last_agbno = agbno + len - 1; int error; - ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno); - while (agbno <= last_agbno) { bool other_owners; @@ -507,7 +504,7 @@ xrep_agfl_check_extent( return error; if (other_owners) { - error = xbitmap_set(&ra->crossed, agbno, 1); + error = xagb_bitmap_set(&ra->crossed, agbno, 1); if (error) return error; } @@ -533,7 +530,7 @@ STATIC int xrep_agfl_collect_blocks( struct xfs_scrub *sc, struct xfs_buf *agf_bp, - struct xbitmap *agfl_extents, + struct xagb_bitmap *agfl_extents, xfs_agblock_t *flcount) { struct xrep_agfl ra; @@ -543,8 +540,8 @@ xrep_agfl_collect_blocks( ra.sc = sc; ra.freesp = agfl_extents; - xbitmap_init(&ra.agmetablocks); - xbitmap_init(&ra.crossed); + xagb_bitmap_init(&ra.agmetablocks); + xagb_bitmap_init(&ra.crossed); /* Find all space used by the free space btrees & rmapbt. */ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); @@ -556,7 +553,7 @@ xrep_agfl_collect_blocks( /* Find all blocks currently being used by the bnobt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_BNO); - error = xbitmap_set_btblocks(&ra.agmetablocks, cur); + error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur); xfs_btree_del_cursor(cur, error); if (error) goto out_bmp; @@ -564,7 +561,7 @@ xrep_agfl_collect_blocks( /* Find all blocks currently being used by the cntbt. */ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag, XFS_BTNUM_CNT); - error = xbitmap_set_btblocks(&ra.agmetablocks, cur); + error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur); xfs_btree_del_cursor(cur, error); if (error) goto out_bmp; @@ -573,17 +570,17 @@ xrep_agfl_collect_blocks( * Drop the freesp meta blocks that are in use by btrees. * The remaining blocks /should/ be AGFL blocks. */ - error = xbitmap_disunion(agfl_extents, &ra.agmetablocks); + error = xagb_bitmap_disunion(agfl_extents, &ra.agmetablocks); if (error) goto out_bmp; /* Strike out the blocks that are cross-linked. */ ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag); - error = xbitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra); + error = xagb_bitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra); xfs_btree_del_cursor(ra.rmap_cur, error); if (error) goto out_bmp; - error = xbitmap_disunion(agfl_extents, &ra.crossed); + error = xagb_bitmap_disunion(agfl_extents, &ra.crossed); if (error) goto out_bmp; @@ -591,12 +588,12 @@ xrep_agfl_collect_blocks( * Calculate the new AGFL size. If we found more blocks than fit in * the AGFL we'll free them later. */ - *flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents), + *flcount = min_t(uint64_t, xagb_bitmap_hweight(agfl_extents), xfs_agfl_size(mp)); out_bmp: - xbitmap_destroy(&ra.crossed); - xbitmap_destroy(&ra.agmetablocks); + xagb_bitmap_destroy(&ra.crossed); + xagb_bitmap_destroy(&ra.agmetablocks); return error; } @@ -626,7 +623,7 @@ xrep_agfl_update_agf( } struct xrep_agfl_fill { - struct xbitmap used_extents; + struct xagb_bitmap used_extents; struct xfs_scrub *sc; __be32 *agfl_bno; xfs_agblock_t flcount; @@ -642,17 +639,15 @@ xrep_agfl_fill( { struct xrep_agfl_fill *af = priv; struct xfs_scrub *sc = af->sc; - xfs_fsblock_t fsbno = start; + xfs_agblock_t agbno = start; int error; - while (fsbno < start + len && af->fl_off < af->flcount) - af->agfl_bno[af->fl_off++] = - cpu_to_be32(XFS_FSB_TO_AGBNO(sc->mp, fsbno++)); + trace_xrep_agfl_insert(sc->sa.pag, agbno, len); - trace_xrep_agfl_insert(sc->mp, sc->sa.pag->pag_agno, - XFS_FSB_TO_AGBNO(sc->mp, start), len); + while (agbno < start + len && af->fl_off < af->flcount) + af->agfl_bno[af->fl_off++] = cpu_to_be32(agbno++); - error = xbitmap_set(&af->used_extents, start, fsbno - 1); + error = xagb_bitmap_set(&af->used_extents, start, agbno - 1); if (error) return error; @@ -667,7 +662,7 @@ STATIC int xrep_agfl_init_header( struct xfs_scrub *sc, struct xfs_buf *agfl_bp, - struct xbitmap *agfl_extents, + struct xagb_bitmap *agfl_extents, xfs_agblock_t flcount) { struct xrep_agfl_fill af = { @@ -695,17 +690,17 @@ xrep_agfl_init_header( * blocks than fit in the AGFL, they will be freed in a subsequent * step. */ - xbitmap_init(&af.used_extents); + xagb_bitmap_init(&af.used_extents); af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), - xbitmap_walk(agfl_extents, xrep_agfl_fill, &af); - error = xbitmap_disunion(agfl_extents, &af.used_extents); + xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); + error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); if (error) return error; /* Write new AGFL to disk. */ xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF); xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1); - xbitmap_destroy(&af.used_extents); + xagb_bitmap_destroy(&af.used_extents); return 0; } @@ -714,7 +709,7 @@ int xrep_agfl( struct xfs_scrub *sc) { - struct xbitmap agfl_extents; + struct xagb_bitmap agfl_extents; struct xfs_mount *mp = sc->mp; struct xfs_buf *agf_bp; struct xfs_buf *agfl_bp; @@ -725,7 +720,7 @@ xrep_agfl( if (!xfs_has_rmapbt(mp)) return -EOPNOTSUPP; - xbitmap_init(&agfl_extents); + xagb_bitmap_init(&agfl_extents); /* * Read the AGF so that we can query the rmapbt. We hope that there's @@ -774,10 +769,10 @@ xrep_agfl( goto err; /* Dump any AGFL overflow. */ - error = xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG, + error = xrep_reap_agblocks(sc, &agfl_extents, &XFS_RMAP_OINFO_AG, XFS_AG_RESV_AGFL); err: - xbitmap_destroy(&agfl_extents); + xagb_bitmap_destroy(&agfl_extents); return error; } diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index 0c959be396ea..e0c89a9a0ca0 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -301,21 +301,15 @@ xagb_bitmap_set_btblocks( * blocks going from the leaf towards the root. */ int -xbitmap_set_btcur_path( - struct xbitmap *bitmap, +xagb_bitmap_set_btcur_path( + struct xagb_bitmap *bitmap, struct xfs_btree_cur *cur) { - struct xfs_buf *bp; - xfs_fsblock_t fsb; int i; int error; for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) { - xfs_btree_get_block(cur, i, &bp); - if (!bp) - continue; - fsb = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); - error = xbitmap_set(bitmap, fsb, 1); + error = xagb_bitmap_visit_btblock(cur, i, bitmap); if (error) return error; } @@ -323,35 +317,6 @@ xbitmap_set_btcur_path( return 0; } -/* Collect a btree's block in the bitmap. */ -STATIC int -xbitmap_collect_btblock( - struct xfs_btree_cur *cur, - int level, - void *priv) -{ - struct xbitmap *bitmap = priv; - struct xfs_buf *bp; - xfs_fsblock_t fsbno; - - xfs_btree_get_block(cur, level, &bp); - if (!bp) - return 0; - - fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); - return xbitmap_set(bitmap, fsbno, 1); -} - -/* Walk the btree and mark the bitmap wherever a btree block is found. */ -int -xbitmap_set_btblocks( - struct xbitmap *bitmap, - struct xfs_btree_cur *cur) -{ - return xfs_btree_visit_blocks(cur, xbitmap_collect_btblock, - XFS_BTREE_VISIT_ALL, bitmap); -} - /* How many bits are set in this bitmap? */ uint64_t xbitmap_hweight( @@ -385,43 +350,6 @@ xbitmap_walk( return error; } -struct xbitmap_walk_bits { - xbitmap_walk_bits_fn fn; - void *priv; -}; - -/* Walk all the bits in a run. */ -static int -xbitmap_walk_bits_in_run( - uint64_t start, - uint64_t len, - void *priv) -{ - struct xbitmap_walk_bits *wb = priv; - uint64_t i; - int error = 0; - - for (i = start; i < start + len; i++) { - error = wb->fn(i, wb->priv); - if (error) - break; - } - - return error; -} - -/* Call a function for every set bit in this bitmap. */ -int -xbitmap_walk_bits( - struct xbitmap *bitmap, - xbitmap_walk_bits_fn fn, - void *priv) -{ - struct xbitmap_walk_bits wb = {.fn = fn, .priv = priv}; - - return xbitmap_walk(bitmap, xbitmap_walk_bits_in_run, &wb); -} - /* Does this bitmap have no bits set at all? */ bool xbitmap_empty( diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h index 84981724ecaf..4fe58bad6734 100644 --- a/fs/xfs/scrub/bitmap.h +++ b/fs/xfs/scrub/bitmap.h @@ -16,10 +16,6 @@ void xbitmap_destroy(struct xbitmap *bitmap); int xbitmap_clear(struct xbitmap *bitmap, uint64_t start, uint64_t len); int xbitmap_set(struct xbitmap *bitmap, uint64_t start, uint64_t len); int xbitmap_disunion(struct xbitmap *bitmap, struct xbitmap *sub); -int xbitmap_set_btcur_path(struct xbitmap *bitmap, - struct xfs_btree_cur *cur); -int xbitmap_set_btblocks(struct xbitmap *bitmap, - struct xfs_btree_cur *cur); uint64_t xbitmap_hweight(struct xbitmap *bitmap); /* @@ -33,10 +29,6 @@ typedef int (*xbitmap_walk_fn)(uint64_t start, uint64_t len, void *priv); int xbitmap_walk(struct xbitmap *bitmap, xbitmap_walk_fn fn, void *priv); -typedef int (*xbitmap_walk_bits_fn)(uint64_t bit, void *priv); -int xbitmap_walk_bits(struct xbitmap *bitmap, xbitmap_walk_bits_fn fn, - void *priv); - bool xbitmap_empty(struct xbitmap *bitmap); bool xbitmap_test(struct xbitmap *bitmap, uint64_t start, uint64_t *len); @@ -110,5 +102,7 @@ static inline int xagb_bitmap_walk(struct xagb_bitmap *bitmap, int xagb_bitmap_set_btblocks(struct xagb_bitmap *bitmap, struct xfs_btree_cur *cur); +int xagb_bitmap_set_btcur_path(struct xagb_bitmap *bitmap, + struct xfs_btree_cur *cur); #endif /* __XFS_SCRUB_BITMAP_H__ */ diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c new file mode 100644 index 000000000000..86a62420e02c --- /dev/null +++ b/fs/xfs/scrub/reap.c @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <[email protected]> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount_btree.h" +#include "xfs_extent_busy.h" +#include "xfs_ag.h" +#include "xfs_ag_resv.h" +#include "xfs_quota.h" +#include "xfs_qm.h" +#include "xfs_bmap.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_attr_remote.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/bitmap.h" +#include "scrub/reap.h" + +/* + * Disposal of Blocks from Old Metadata + * + * Now that we've constructed a new btree to replace the damaged one, we want + * to dispose of the blocks that (we think) the old btree was using. + * Previously, we used the rmapbt to collect the extents (bitmap) with the + * rmap owner corresponding to the tree we rebuilt, collected extents for any + * blocks with the same rmap owner that are owned by another data structure + * (sublist), and subtracted sublist from bitmap. In theory the extents + * remaining in bitmap are the old btree's blocks. + * + * Unfortunately, it's possible that the btree was crosslinked with other + * blocks on disk. The rmap data can tell us if there are multiple owners, so + * if the rmapbt says there is an owner of this block other than @oinfo, then + * the block is crosslinked. Remove the reverse mapping and continue. + * + * If there is one rmap record, we can free the block, which removes the + * reverse mapping but doesn't add the block to the free space. Our repair + * strategy is to hope the other metadata objects crosslinked on this block + * will be rebuilt (atop different blocks), thereby removing all the cross + * links. + * + * If there are no rmap records at all, we also free the block. If the btree + * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't + * supposed to be a rmap record and everything is ok. For other btrees there + * had to have been an rmap entry for the block to have ended up on @bitmap, + * so if it's gone now there's something wrong and the fs will shut down. + * + * Note: If there are multiple rmap records with only the same rmap owner as + * the btree we're trying to rebuild and the block is indeed owned by another + * data structure with the same rmap owner, then the block will be in sublist + * and therefore doesn't need disposal. If there are multiple rmap records + * with only the same rmap owner but the block is not owned by something with + * the same rmap owner, the block will be freed. + * + * The caller is responsible for locking the AG headers for the entire rebuild + * operation so that nothing else can sneak in and change the AG state while + * we're not looking. We must also invalidate any buffers associated with + * @bitmap. + */ + +/* Information about reaping extents after a repair. */ +struct xreap_state { + struct xfs_scrub *sc; + + /* Reverse mapping owner and metadata reservation type. */ + const struct xfs_owner_info *oinfo; + enum xfs_ag_resv_type resv; + + /* If true, roll the transaction before reaping the next extent. */ + bool force_roll; + + /* Number of deferred reaps attached to the current transaction. */ + unsigned int deferred; + + /* Number of invalidated buffers logged to the current transaction. */ + unsigned int invalidated; + + /* Number of deferred reaps queued during the whole reap sequence. */ + unsigned long long total_deferred; +}; + +/* Put a block back on the AGFL. */ +STATIC int +xreap_put_freelist( + struct xfs_scrub *sc, + xfs_agblock_t agbno) +{ + struct xfs_buf *agfl_bp; + int error; + + /* Make sure there's space on the freelist. */ + error = xrep_fix_freelist(sc, true); + if (error) + return error; + + /* + * Since we're "freeing" a lost block onto the AGFL, we have to + * create an rmap for the block prior to merging it or else other + * parts will break. + */ + error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1, + &XFS_RMAP_OINFO_AG); + if (error) + return error; + + /* Put the block on the AGFL. */ + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); + if (error) + return error; + + error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, + agfl_bp, agbno, 0); + if (error) + return error; + xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, + XFS_EXTENT_BUSY_SKIP_DISCARD); + + return 0; +} + +/* Are there any uncommitted reap operations? */ +static inline bool xreap_dirty(const struct xreap_state *rs) +{ + if (rs->force_roll) + return true; + if (rs->deferred) + return true; + if (rs->invalidated) + return true; + if (rs->total_deferred) + return true; + return false; +} + +#define XREAP_MAX_BINVAL (2048) + +/* + * Decide if we want to roll the transaction after reaping an extent. We don't + * want to overrun the transaction reservation, so we prohibit more than + * 128 EFIs per transaction. For the same reason, we limit the number + * of buffer invalidations to 2048. + */ +static inline bool xreap_want_roll(const struct xreap_state *rs) +{ + if (rs->force_roll) + return true; + if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS) + return true; + if (rs->invalidated > XREAP_MAX_BINVAL) + return true; + return false; +} + +static inline void xreap_reset(struct xreap_state *rs) +{ + rs->total_deferred += rs->deferred; + rs->deferred = 0; + rs->invalidated = 0; + rs->force_roll = false; +} + +#define XREAP_MAX_DEFER_CHAIN (2048) + +/* + * Decide if we want to finish the deferred ops that are attached to the scrub + * transaction. We don't want to queue huge chains of deferred ops because + * that can consume a lot of log space and kernel memory. Hence we trigger a + * xfs_defer_finish if there are more than 2048 deferred reap operations or the + * caller did some real work. + */ +static inline bool +xreap_want_defer_finish(const struct xreap_state *rs) +{ + if (rs->force_roll) + return true; + if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN) + return true; + return false; +} + +static inline void xreap_defer_finish_reset(struct xreap_state *rs) +{ + rs->total_deferred = 0; + rs->deferred = 0; + rs->invalidated = 0; + rs->force_roll = false; +} + +/* Try to invalidate the incore buffers for an extent that we're freeing. */ +STATIC void +xreap_agextent_binval( + struct xreap_state *rs, + xfs_agblock_t agbno, + xfs_extlen_t *aglenp) +{ + struct xfs_scrub *sc = rs->sc; + struct xfs_perag *pag = sc->sa.pag; + struct xfs_mount *mp = sc->mp; + xfs_agnumber_t agno = sc->sa.pag->pag_agno; + xfs_agblock_t agbno_next = agbno + *aglenp; + xfs_agblock_t bno = agbno; + + /* + * Avoid invalidating AG headers and post-EOFS blocks because we never + * own those. + */ + if (!xfs_verify_agbno(pag, agbno) || + !xfs_verify_agbno(pag, agbno_next - 1)) + return; + + /* + * If there are incore buffers for these blocks, invalidate them. We + * assume that the lack of any other known owners means that the buffer + * can be locked without risk of deadlocking. The buffer cache cannot + * detect aliasing, so employ nested loops to scan for incore buffers + * of any plausible size. + */ + while (bno < agbno_next) { + xfs_agblock_t fsbcount; + xfs_agblock_t max_fsbs; + + /* + * Max buffer size is the max remote xattr buffer size, which + * is one fs block larger than 64k. + */ + max_fsbs = min_t(xfs_agblock_t, agbno_next - bno, + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX)); + + for (fsbcount = 1; fsbcount < max_fsbs; fsbcount++) { + struct xfs_buf *bp = NULL; + xfs_daddr_t daddr; + int error; + + daddr = XFS_AGB_TO_DADDR(mp, agno, bno); + error = xfs_buf_incore(mp->m_ddev_targp, daddr, + XFS_FSB_TO_BB(mp, fsbcount), + XBF_LIVESCAN, &bp); + if (error) + continue; + + xfs_trans_bjoin(sc->tp, bp); + xfs_trans_binval(sc->tp, bp); + rs->invalidated++; + + /* + * Stop invalidating if we've hit the limit; we should + * still have enough reservation left to free however + * far we've gotten. + */ + if (rs->invalidated > XREAP_MAX_BINVAL) { + *aglenp -= agbno_next - bno; + goto out; + } + } + + bno++; + } + +out: + trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp); +} + +/* + * Figure out the longest run of blocks that we can dispose of with a single + * call. Cross-linked blocks should have their reverse mappings removed, but + * single-owner extents can be freed. AGFL blocks can only be put back one at + * a time. + */ +STATIC int +xreap_agextent_select( + struct xreap_state *rs, + xfs_agblock_t agbno, + xfs_agblock_t agbno_next, + bool *crosslinked, + xfs_extlen_t *aglenp) +{ + struct xfs_scrub *sc = rs->sc; + struct xfs_btree_cur *cur; + xfs_agblock_t bno = agbno + 1; + xfs_extlen_t len = 1; + int error; + + /* + * Determine if there are any other rmap records covering the first + * block of this extent. If so, the block is crosslinked. + */ + cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp, + sc->sa.pag); + error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo, + crosslinked); + if (error) + goto out_cur; + + /* AGFL blocks can only be deal with one at a time. */ + if (rs->resv == XFS_AG_RESV_AGFL) + goto out_found; + + /* + * Figure out how many of the subsequent blocks have the same crosslink + * status. + */ + while (bno < agbno_next) { + bool also_crosslinked; + + error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo, + &also_crosslinked); + if (error) + goto out_cur; + + if (*crosslinked != also_crosslinked) + break; + + len++; + bno++; + } + +out_found: + *aglenp = len; + trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked); +out_cur: + xfs_btree_del_cursor(cur, error); + return error; +} + +/* + * Dispose of as much of the beginning of this AG extent as possible. The + * number of blocks disposed of will be returned in @aglenp. + */ +STATIC int +xreap_agextent_iter( + struct xreap_state *rs, + xfs_agblock_t agbno, + xfs_extlen_t *aglenp, + bool crosslinked) +{ + struct xfs_scrub *sc = rs->sc; + xfs_fsblock_t fsbno; + int error = 0; + + fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno); + + /* + * If there are other rmappings, this block is cross linked and must + * not be freed. Remove the reverse mapping and move on. Otherwise, + * we were the only owner of the block, so free the extent, which will + * also remove the rmap. + * + * XXX: XFS doesn't support detecting the case where a single block + * metadata structure is crosslinked with a multi-block structure + * because the buffer cache doesn't detect aliasing problems, so we + * can't fix 100% of crosslinking problems (yet). The verifiers will + * blow on writeout, the filesystem will shut down, and the admin gets + * to run xfs_repair. + */ + if (crosslinked) { + trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp); + + rs->force_roll = true; + return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, + *aglenp, rs->oinfo); + } + + trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp); + + /* + * Invalidate as many buffers as we can, starting at agbno. If this + * function sets *aglenp to zero, the transaction is full of logged + * buffer invalidations, so we need to return early so that we can + * roll and retry. + */ + xreap_agextent_binval(rs, agbno, aglenp); + if (*aglenp == 0) { + ASSERT(xreap_want_roll(rs)); + return 0; + } + + /* Put blocks back on the AGFL one at a time. */ + if (rs->resv == XFS_AG_RESV_AGFL) { + ASSERT(*aglenp == 1); + error = xreap_put_freelist(sc, agbno); + if (error) + return error; + + rs->force_roll = true; + return 0; + } + + /* + * Use deferred frees to get rid of the old btree blocks to try to + * minimize the window in which we could crash and lose the old blocks. + */ + error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo, + rs->resv, true); + if (error) + return error; + + rs->deferred++; + return 0; +} + +/* + * Break an AG metadata extent into sub-extents by fate (crosslinked, not + * crosslinked), and dispose of each sub-extent separately. + */ +STATIC int +xreap_agmeta_extent( + uint64_t fsbno, + uint64_t len, + void *priv) +{ + struct xreap_state *rs = priv; + struct xfs_scrub *sc = rs->sc; + xfs_agblock_t agbno = fsbno; + xfs_agblock_t agbno_next = agbno + len; + int error = 0; + + ASSERT(len <= XFS_MAX_BMBT_EXTLEN); + ASSERT(sc->ip == NULL); + + while (agbno < agbno_next) { + xfs_extlen_t aglen; + bool crosslinked; + + error = xreap_agextent_select(rs, agbno, agbno_next, + &crosslinked, &aglen); + if (error) + return error; + + error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked); + if (error) + return error; + + if (xreap_want_defer_finish(rs)) { + error = xrep_defer_finish(sc); + if (error) + return error; + xreap_defer_finish_reset(rs); + } else if (xreap_want_roll(rs)) { + error = xrep_roll_ag_trans(sc); + if (error) + return error; + xreap_reset(rs); + } + + agbno += aglen; + } + + return 0; +} + +/* Dispose of every block of every AG metadata extent in the bitmap. */ +int +xrep_reap_agblocks( + struct xfs_scrub *sc, + struct xagb_bitmap *bitmap, + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) +{ + struct xreap_state rs = { + .sc = sc, + .oinfo = oinfo, + .resv = type, + }; + int error; + + ASSERT(xfs_has_rmapbt(sc->mp)); + ASSERT(sc->ip == NULL); + + error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs); + if (error) + return error; + + if (xreap_dirty(&rs)) + return xrep_defer_finish(sc); + + return 0; +} diff --git a/fs/xfs/scrub/reap.h b/fs/xfs/scrub/reap.h new file mode 100644 index 000000000000..fe24626af164 --- /dev/null +++ b/fs/xfs/scrub/reap.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022-2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <[email protected]> + */ +#ifndef __XFS_SCRUB_REAP_H__ +#define __XFS_SCRUB_REAP_H__ + +int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap, + const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type); + +#endif /* __XFS_SCRUB_REAP_H__ */ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index ac6d8803e660..83a1b1437a4f 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -26,6 +26,7 @@ #include "xfs_ag_resv.h" #include "xfs_quota.h" #include "xfs_qm.h" +#include "xfs_defer.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -166,6 +167,56 @@ xrep_roll_ag_trans( return 0; } +/* Finish all deferred work attached to the repair transaction. */ +int +xrep_defer_finish( + struct xfs_scrub *sc) +{ + int error; + + /* + * Keep the AG header buffers locked while we complete deferred work + * items. Ensure that both AG buffers are dirty and held when we roll + * the transaction so that they move forward in the log without losing + * the bli (and hence the bli type) when the transaction commits. + * + * Normal code would never hold clean buffers across a roll, but repair + * needs both buffers to maintain a total lock on the AG. + */ + if (sc->sa.agi_bp) { + xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM); + xfs_trans_bhold(sc->tp, sc->sa.agi_bp); + } + + if (sc->sa.agf_bp) { + xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM); + xfs_trans_bhold(sc->tp, sc->sa.agf_bp); + } + + /* + * Finish all deferred work items. We still hold the AG header buffers + * locked regardless of whether or not that succeeds. On failure, the + * buffers will be released during teardown on our way out of the + * kernel. If successful, join the buffers to the new transaction + * and move on. + */ + error = xfs_defer_finish(&sc->tp); + if (error) + return error; + + /* + * Release the hold that we set above because defer_finish won't do + * that for us. The defer roll code redirties held buffers after each + * roll, so the AG header buffers should be ready for logging. + */ + if (sc->sa.agi_bp) + xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp); + if (sc->sa.agf_bp) + xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp); + + return 0; +} + /* * Does the given AG have enough space to rebuild a btree? Neither AG * reservation can be critical, and we must have enough space (factoring @@ -297,89 +348,6 @@ xrep_calc_ag_resblks( return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz)); } -/* Allocate a block in an AG. */ -int -xrep_alloc_ag_block( - struct xfs_scrub *sc, - const struct xfs_owner_info *oinfo, - xfs_fsblock_t *fsbno, - enum xfs_ag_resv_type resv) -{ - struct xfs_alloc_arg args = {0}; - xfs_agblock_t bno; - int error; - - switch (resv) { - case XFS_AG_RESV_AGFL: - case XFS_AG_RESV_RMAPBT: - error = xfs_alloc_get_freelist(sc->sa.pag, sc->tp, - sc->sa.agf_bp, &bno, 1); - if (error) - return error; - if (bno == NULLAGBLOCK) - return -ENOSPC; - xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, 1, false); - *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno); - if (resv == XFS_AG_RESV_RMAPBT) - xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno); - return 0; - default: - break; - } - - args.tp = sc->tp; - args.mp = sc->mp; - args.pag = sc->sa.pag; - args.oinfo = *oinfo; - args.minlen = 1; - args.maxlen = 1; - args.prod = 1; - args.resv = resv; - - error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno); - if (error) - return error; - if (args.fsbno == NULLFSBLOCK) - return -ENOSPC; - ASSERT(args.len == 1); - *fsbno = args.fsbno; - - return 0; -} - -/* Initialize a new AG btree root block with zero entries. */ -int -xrep_init_btblock( - struct xfs_scrub *sc, - xfs_fsblock_t fsb, - struct xfs_buf **bpp, - xfs_btnum_t btnum, - const struct xfs_buf_ops *ops) -{ - struct xfs_trans *tp = sc->tp; - struct xfs_mount *mp = sc->mp; - struct xfs_buf *bp; - int error; - - trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb), - XFS_FSB_TO_AGBNO(mp, fsb), btnum); - - ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.pag->pag_agno); - error = xfs_trans_get_buf(tp, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0, - &bp); - if (error) - return error; - xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.pag->pag_agno); - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); - xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1); - bp->b_ops = ops; - *bpp = bp; - - return 0; -} - /* * Reconstructing per-AG Btrees * @@ -404,91 +372,8 @@ xrep_init_btblock( * sublist. As with the other btrees we subtract sublist from bitmap, and the * result (since the rmapbt lives in the free space) are the blocks from the * old rmapbt. - * - * Disposal of Blocks from Old per-AG Btrees - * - * Now that we've constructed a new btree to replace the damaged one, we want - * to dispose of the blocks that (we think) the old btree was using. - * Previously, we used the rmapbt to collect the extents (bitmap) with the - * rmap owner corresponding to the tree we rebuilt, collected extents for any - * blocks with the same rmap owner that are owned by another data structure - * (sublist), and subtracted sublist from bitmap. In theory the extents - * remaining in bitmap are the old btree's blocks. - * - * Unfortunately, it's possible that the btree was crosslinked with other - * blocks on disk. The rmap data can tell us if there are multiple owners, so - * if the rmapbt says there is an owner of this block other than @oinfo, then - * the block is crosslinked. Remove the reverse mapping and continue. - * - * If there is one rmap record, we can free the block, which removes the - * reverse mapping but doesn't add the block to the free space. Our repair - * strategy is to hope the other metadata objects crosslinked on this block - * will be rebuilt (atop different blocks), thereby removing all the cross - * links. - * - * If there are no rmap records at all, we also free the block. If the btree - * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't - * supposed to be a rmap record and everything is ok. For other btrees there - * had to have been an rmap entry for the block to have ended up on @bitmap, - * so if it's gone now there's something wrong and the fs will shut down. - * - * Note: If there are multiple rmap records with only the same rmap owner as - * the btree we're trying to rebuild and the block is indeed owned by another - * data structure with the same rmap owner, then the block will be in sublist - * and therefore doesn't need disposal. If there are multiple rmap records - * with only the same rmap owner but the block is not owned by something with - * the same rmap owner, the block will be freed. - * - * The caller is responsible for locking the AG headers for the entire rebuild - * operation so that nothing else can sneak in and change the AG state while - * we're not looking. We also assume that the caller already invalidated any - * buffers associated with @bitmap. */ -static int -xrep_invalidate_block( - uint64_t fsbno, - void *priv) -{ - struct xfs_scrub *sc = priv; - struct xfs_buf *bp; - int error; - - /* Skip AG headers and post-EOFS blocks */ - if (!xfs_verify_fsbno(sc->mp, fsbno)) - return 0; - - error = xfs_buf_incore(sc->mp->m_ddev_targp, - XFS_FSB_TO_DADDR(sc->mp, fsbno), - XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK, &bp); - if (error) - return 0; - - xfs_trans_bjoin(sc->tp, bp); - xfs_trans_binval(sc->tp, bp); - return 0; -} - -/* - * Invalidate buffers for per-AG btree blocks we're dumping. This function - * is not intended for use with file data repairs; we have bunmapi for that. - */ -int -xrep_invalidate_blocks( - struct xfs_scrub *sc, - struct xbitmap *bitmap) -{ - /* - * For each block in each extent, see if there's an incore buffer for - * exactly that block; if so, invalidate it. The buffer cache only - * lets us look for one buffer at a time, so we have to look one block - * at a time. Avoid invalidating AG headers and post-EOFS blocks - * because we never own those; and if we can't TRYLOCK the buffer we - * assume it's owned by someone else. - */ - return xbitmap_walk_bits(bitmap, xrep_invalidate_block, sc); -} - /* Ensure the freelist is the correct size. */ int xrep_fix_freelist( @@ -507,155 +392,6 @@ xrep_fix_freelist( can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK); } -/* Information about reaping extents after a repair. */ -struct xrep_reap_state { - struct xfs_scrub *sc; - - /* Reverse mapping owner and metadata reservation type. */ - const struct xfs_owner_info *oinfo; - enum xfs_ag_resv_type resv; -}; - -/* - * Put a block back on the AGFL. - */ -STATIC int -xrep_put_freelist( - struct xfs_scrub *sc, - xfs_agblock_t agbno) -{ - struct xfs_buf *agfl_bp; - int error; - - /* Make sure there's space on the freelist. */ - error = xrep_fix_freelist(sc, true); - if (error) - return error; - - /* - * Since we're "freeing" a lost block onto the AGFL, we have to - * create an rmap for the block prior to merging it or else other - * parts will break. - */ - error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1, - &XFS_RMAP_OINFO_AG); - if (error) - return error; - - /* Put the block on the AGFL. */ - error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); - if (error) - return error; - - error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, - agfl_bp, agbno, 0); - if (error) - return error; - xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, - XFS_EXTENT_BUSY_SKIP_DISCARD); - - return 0; -} - -/* Dispose of a single block. */ -STATIC int -xrep_reap_block( - uint64_t fsbno, - void *priv) -{ - struct xrep_reap_state *rs = priv; - struct xfs_scrub *sc = rs->sc; - struct xfs_btree_cur *cur; - struct xfs_buf *agf_bp = NULL; - xfs_agblock_t agbno; - bool has_other_rmap; - int error; - - ASSERT(sc->ip != NULL || - XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno); - trace_xrep_dispose_btree_extent(sc->mp, - XFS_FSB_TO_AGNO(sc->mp, fsbno), - XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1); - - agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); - ASSERT(XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno); - - /* - * If we are repairing per-inode metadata, we need to read in the AGF - * buffer. Otherwise, we're repairing a per-AG structure, so reuse - * the AGF buffer that the setup functions already grabbed. - */ - if (sc->ip) { - error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); - if (error) - return error; - } else { - agf_bp = sc->sa.agf_bp; - } - cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, sc->sa.pag); - - /* Can we find any other rmappings? */ - error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo, - &has_other_rmap); - xfs_btree_del_cursor(cur, error); - if (error) - goto out_free; - - /* - * If there are other rmappings, this block is cross linked and must - * not be freed. Remove the reverse mapping and move on. Otherwise, - * we were the only owner of the block, so free the extent, which will - * also remove the rmap. - * - * XXX: XFS doesn't support detecting the case where a single block - * metadata structure is crosslinked with a multi-block structure - * because the buffer cache doesn't detect aliasing problems, so we - * can't fix 100% of crosslinking problems (yet). The verifiers will - * blow on writeout, the filesystem will shut down, and the admin gets - * to run xfs_repair. - */ - if (has_other_rmap) - error = xfs_rmap_free(sc->tp, agf_bp, sc->sa.pag, agbno, - 1, rs->oinfo); - else if (rs->resv == XFS_AG_RESV_AGFL) - error = xrep_put_freelist(sc, agbno); - else - error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, rs->oinfo, - rs->resv); - if (agf_bp != sc->sa.agf_bp) - xfs_trans_brelse(sc->tp, agf_bp); - if (error) - return error; - - if (sc->ip) - return xfs_trans_roll_inode(&sc->tp, sc->ip); - return xrep_roll_ag_trans(sc); - -out_free: - if (agf_bp != sc->sa.agf_bp) - xfs_trans_brelse(sc->tp, agf_bp); - return error; -} - -/* Dispose of every block of every extent in the bitmap. */ -int -xrep_reap_extents( - struct xfs_scrub *sc, - struct xbitmap *bitmap, - const struct xfs_owner_info *oinfo, - enum xfs_ag_resv_type type) -{ - struct xrep_reap_state rs = { - .sc = sc, - .oinfo = oinfo, - .resv = type, - }; - - ASSERT(xfs_has_rmapbt(sc->mp)); - - return xbitmap_walk_bits(bitmap, xrep_reap_block, &rs); -} - /* * Finding per-AG Btree Roots for AGF/AGI Reconstruction * diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index dce791c679ee..45478040a19d 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -15,28 +15,28 @@ static inline int xrep_notsupported(struct xfs_scrub *sc) #ifdef CONFIG_XFS_ONLINE_REPAIR +/* + * This is the maximum number of deferred extent freeing item extents (EFIs) + * that we'll attach to a transaction without rolling the transaction to avoid + * overrunning a tr_itruncate reservation. + */ +#define XREP_MAX_ITRUNCATE_EFIS (128) + + /* Repair helpers */ int xrep_attempt(struct xfs_scrub *sc); void xrep_failure(struct xfs_mount *mp); int xrep_roll_ag_trans(struct xfs_scrub *sc); +int xrep_defer_finish(struct xfs_scrub *sc); bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks, enum xfs_ag_resv_type type); xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc); -int xrep_alloc_ag_block(struct xfs_scrub *sc, - const struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno, - enum xfs_ag_resv_type resv); -int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb, - struct xfs_buf **bpp, xfs_btnum_t btnum, - const struct xfs_buf_ops *ops); struct xbitmap; struct xagb_bitmap; int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink); -int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xbitmap *btlist); -int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist, - const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type); struct xrep_find_ag_btree { /* in: rmap owner of the btree we're looking for */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index b3894daeb86a..73cf1002bd94 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -729,9 +729,8 @@ TRACE_EVENT(xchk_refcount_incorrect, #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) DECLARE_EVENT_CLASS(xrep_extent_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(mp, agno, agbno, len), + TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(pag, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) @@ -739,8 +738,8 @@ DECLARE_EVENT_CLASS(xrep_extent_class, __field(xfs_extlen_t, len) ), TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; __entry->agbno = agbno; __entry->len = len; ), @@ -752,12 +751,45 @@ DECLARE_EVENT_CLASS(xrep_extent_class, ); #define DEFINE_REPAIR_EXTENT_EVENT(name) \ DEFINE_EVENT(xrep_extent_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ - xfs_agblock_t agbno, xfs_extlen_t len), \ - TP_ARGS(mp, agno, agbno, len)) -DEFINE_REPAIR_EXTENT_EVENT(xrep_dispose_btree_extent); + TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(pag, agbno, len)) +DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent); +DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent); +DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval); DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert); +DECLARE_EVENT_CLASS(xrep_reap_find_class, + TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len, + bool crosslinked), + TP_ARGS(pag, agbno, len, crosslinked), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(bool, crosslinked) + ), + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->crosslinked = crosslinked; + ), + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x crosslinked %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->crosslinked ? 1 : 0) +); +#define DEFINE_REPAIR_REAP_FIND_EVENT(name) \ +DEFINE_EVENT(xrep_reap_find_class, name, \ + TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len, \ + bool crosslinked), \ + TP_ARGS(pag, agbno, len, crosslinked)) +DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select); + DECLARE_EVENT_CLASS(xrep_rmap_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len, @@ -827,28 +859,6 @@ TRACE_EVENT(xrep_refcount_extent_fn, __entry->refcount) ) -TRACE_EVENT(xrep_init_btblock, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, - xfs_btnum_t btnum), - TP_ARGS(mp, agno, agbno, btnum), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(uint32_t, btnum) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->btnum = btnum; - ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x btree %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS)) -) TRACE_EVENT(xrep_findroot_block, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, uint32_t magic, uint16_t level), diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 15d1e5a7c2d3..fa392c43ba16 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -481,7 +481,8 @@ _xfs_buf_obj_cmp( * reallocating a busy extent. Skip this buffer and * continue searching for an exact match. */ - ASSERT(bp->b_flags & XBF_STALE); + if (!(map->bm_flags & XBM_LIVESCAN)) + ASSERT(bp->b_flags & XBF_STALE); return 1; } return 0; @@ -559,6 +560,10 @@ xfs_buf_find_lock( * intact here. */ if (bp->b_flags & XBF_STALE) { + if (flags & XBF_LIVESCAN) { + xfs_buf_unlock(bp); + return -ENOENT; + } ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); bp->b_flags &= _XBF_KMEM | _XBF_PAGES; bp->b_ops = NULL; @@ -682,6 +687,8 @@ xfs_buf_get_map( int error; int i; + if (flags & XBF_LIVESCAN) + cmap.bm_flags |= XBM_LIVESCAN; for (i = 0; i < nmaps; i++) cmap.bm_len += map[i].bm_len; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 549c60942208..df8f47953bb4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -44,6 +44,11 @@ struct xfs_buf; #define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */ /* flags used only as arguments to access routines */ +/* + * Online fsck is scanning the buffer cache for live buffers. Do not warn + * about length mismatches during lookups and do not return stale buffers. + */ +#define XBF_LIVESCAN (1u << 28) #define XBF_INCORE (1u << 29)/* lookup only, return if found in cache */ #define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */ #define XBF_UNMAPPED (1u << 31)/* do not map the buffer */ @@ -67,6 +72,7 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ /* The following interface flags should never be set */ \ + { XBF_LIVESCAN, "LIVESCAN" }, \ { XBF_INCORE, "INCORE" }, \ { XBF_TRYLOCK, "TRYLOCK" }, \ { XBF_UNMAPPED, "UNMAPPED" } @@ -114,8 +120,15 @@ typedef struct xfs_buftarg { struct xfs_buf_map { xfs_daddr_t bm_bn; /* block number for I/O */ int bm_len; /* size of I/O */ + unsigned int bm_flags; }; +/* + * Online fsck is scanning the buffer cache for live buffers. Do not warn + * about length mismatches during lookups and do not return stale buffers. + */ +#define XBM_LIVESCAN (1U << 0) + #define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; |