aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_ag.c1
-rw-r--r--fs/xfs/libxfs/xfs_ag.h4
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c49
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h8
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h1
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c199
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h31
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c163
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h6
-rw-r--r--fs/xfs/libxfs/xfs_shared.h10
-rw-r--r--fs/xfs/scrub/agb_bitmap.h5
-rw-r--r--fs/xfs/scrub/bitmap.c14
-rw-r--r--fs/xfs/scrub/bitmap.h2
-rw-r--r--fs/xfs/scrub/bmap.c2
-rw-r--r--fs/xfs/scrub/common.c5
-rw-r--r--fs/xfs/scrub/common.h1
-rw-r--r--fs/xfs/scrub/newbt.c12
-rw-r--r--fs/xfs/scrub/newbt.h7
-rw-r--r--fs/xfs/scrub/reap.c2
-rw-r--r--fs/xfs/scrub/repair.c59
-rw-r--r--fs/xfs/scrub/repair.h12
-rw-r--r--fs/xfs/scrub/rmap.c11
-rw-r--r--fs/xfs/scrub/rmap_repair.c1697
-rw-r--r--fs/xfs/scrub/scrub.c6
-rw-r--r--fs/xfs/scrub/scrub.h4
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h80
-rw-r--r--fs/xfs/xfs_stats.c3
-rw-r--r--fs/xfs/xfs_stats.h1
31 files changed, 2336 insertions, 70 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 9a6574da8de5..6de02b2573c3 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -201,6 +201,7 @@ xfs-y += $(addprefix scrub/, \
reap.o \
refcount_repair.o \
repair.o \
+ rmap_repair.o \
)
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 32d80a76440c..d728709054b2 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -417,6 +417,7 @@ xfs_initialize_perag(
init_waitqueue_head(&pag->pag_active_wq);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
+ xfs_hooks_init(&pag->pag_rmap_update_hooks);
#endif /* __KERNEL__ */
error = xfs_buf_cache_init(&pag->pag_bcache);
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 29bfa6273dec..35de09a2516c 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -90,6 +90,7 @@ struct xfs_perag {
uint8_t pagf_repair_bno_level;
uint8_t pagf_repair_cnt_level;
uint8_t pagf_repair_refcount_level;
+ uint8_t pagf_repair_rmap_level;
#endif
spinlock_t pag_state_lock;
@@ -119,6 +120,9 @@ struct xfs_perag {
* inconsistencies.
*/
struct xfs_defer_drain pag_intents_drain;
+
+ /* Hook to feed rmapbt updates to an active online repair. */
+ struct xfs_hooks pag_rmap_update_hooks;
#endif /* __KERNEL__ */
};
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2e0546eb8010..06e329221cd5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4913,7 +4913,7 @@ xfs_bmap_del_extent_delay(
XFS_STATS_INC(mp, xs_del_exlist);
- isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ isrt = xfs_ifork_is_realtime(ip, whichfork);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got->br_startoff + got->br_blockcount;
da_old = startblockval(got->br_startblock);
@@ -5149,7 +5149,7 @@ xfs_bmap_del_extent_real(
return -ENOSPC;
*logflagsp = XFS_ILOG_CORE;
- if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
+ if (xfs_ifork_is_realtime(ip, whichfork)) {
if (!(bflags & XFS_BMAPI_REMAP)) {
error = xfs_rtfree_blocks(tp, del->br_startblock,
del->br_blockcount);
@@ -5396,7 +5396,7 @@ __xfs_bunmapi(
return 0;
}
XFS_STATS_INC(mp, xs_blk_unmap);
- isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ isrt = xfs_ifork_is_realtime(ip, whichfork);
end = start + len;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
@@ -6379,3 +6379,46 @@ xfs_bunmapi_range(
out:
return error;
}
+
+struct xfs_bmap_query_range {
+ xfs_bmap_query_range_fn fn;
+ void *priv;
+};
+
+/* Format btree record and pass to our callback. */
+STATIC int
+xfs_bmap_query_range_helper(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_bmap_query_range *query = priv;
+ struct xfs_bmbt_irec irec;
+ xfs_failaddr_t fa;
+
+ xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
+ fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork,
+ &irec);
+ if (fa) {
+ xfs_btree_mark_sick(cur);
+ return xfs_bmap_complain_bad_rec(cur->bc_ino.ip,
+ cur->bc_ino.whichfork, fa, &irec);
+ }
+
+ return query->fn(cur, &irec, query->priv);
+}
+
+/* Find all bmaps. */
+int
+xfs_bmap_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_bmap_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_bmap_query_range query = {
+ .priv = priv,
+ .fn = fn,
+ };
+
+ return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f6b73f1bad5f..10b85865204d 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -280,4 +280,12 @@ extern struct kmem_cache *xfs_bmap_intent_cache;
int __init xfs_bmap_intent_init_cache(void);
void xfs_bmap_intent_destroy_cache(void);
+typedef int (*xfs_bmap_query_range_fn)(
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *rec,
+ void *priv);
+
+int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn,
+ void *priv);
+
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 83194edcb0ba..3ab0ea133557 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -813,3 +813,12 @@ xfs_iext_count_upgrade(
return 0;
}
+
+/* Decide if a file mapping is on the realtime device or not. */
+bool
+xfs_ifork_is_realtime(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ return XFS_IS_REALTIME_INODE(ip) && whichfork != XFS_ATTR_FORK;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 96303249d28a..bd53eb951b65 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -260,6 +260,7 @@ int xfs_iext_count_may_overflow(struct xfs_inode *ip, int whichfork,
int nr_to_add);
int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip,
uint nr_to_add);
+bool xfs_ifork_is_realtime(struct xfs_inode *ip, int whichfork);
/* returns true if the fork has extents but they are not read in yet. */
static inline bool xfs_need_iread_extents(const struct xfs_ifork *ifp)
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index c9e12533c813..ef16f6f9cef6 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -215,10 +215,10 @@ xfs_rmap_btrec_to_irec(
/* Simple checks for rmap records. */
xfs_failaddr_t
xfs_rmap_check_irec(
- struct xfs_btree_cur *cur,
+ struct xfs_perag *pag,
const struct xfs_rmap_irec *irec)
{
- struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_mount *mp = pag->pag_mount;
bool is_inode;
bool is_unwritten;
bool is_bmbt;
@@ -233,8 +233,8 @@ xfs_rmap_check_irec(
return __this_address;
} else {
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock,
- irec->rm_blockcount))
+ if (!xfs_verify_agbext(pag, irec->rm_startblock,
+ irec->rm_blockcount))
return __this_address;
}
@@ -269,6 +269,16 @@ xfs_rmap_check_irec(
return NULL;
}
+static inline xfs_failaddr_t
+xfs_rmap_check_btrec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *irec)
+{
+ if (xfs_btree_is_mem_rmap(cur->bc_ops))
+ return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
+ return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
+}
+
static inline int
xfs_rmap_complain_bad_rec(
struct xfs_btree_cur *cur,
@@ -277,9 +287,13 @@ xfs_rmap_complain_bad_rec(
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_warn(mp,
- "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
- cur->bc_ag.pag->pag_agno, fa);
+ if (xfs_btree_is_mem_rmap(cur->bc_ops))
+ xfs_warn(mp,
+ "In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
+ else
+ xfs_warn(mp,
+ "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
+ cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
@@ -307,7 +321,7 @@ xfs_rmap_get_rec(
fa = xfs_rmap_btrec_to_irec(rec, irec);
if (!fa)
- fa = xfs_rmap_check_irec(cur, irec);
+ fa = xfs_rmap_check_btrec(cur, irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, irec);
@@ -807,6 +821,86 @@ out_error:
return error;
}
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/*
+ * Use a static key here to reduce the overhead of rmapbt live updates. If
+ * the compiler supports jump labels, the static branch will be replaced by a
+ * nop sled when there are no hook users. Online fsck is currently the only
+ * caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_rmap_hooks_switch);
+
+void
+xfs_rmap_hook_disable(void)
+{
+ xfs_hooks_switch_off(&xfs_rmap_hooks_switch);
+}
+
+void
+xfs_rmap_hook_enable(void)
+{
+ xfs_hooks_switch_on(&xfs_rmap_hooks_switch);
+}
+
+/* Call downstream hooks for a reverse mapping update. */
+static inline void
+xfs_rmap_update_hook(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ enum xfs_rmap_intent_type op,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
+{
+ if (xfs_hooks_switched_on(&xfs_rmap_hooks_switch)) {
+ struct xfs_rmap_update_params p = {
+ .startblock = startblock,
+ .blockcount = blockcount,
+ .unwritten = unwritten,
+ .oinfo = *oinfo, /* struct copy */
+ };
+
+ if (pag)
+ xfs_hooks_call(&pag->pag_rmap_update_hooks, op, &p);
+ }
+}
+
+/* Call the specified function during a reverse mapping update. */
+int
+xfs_rmap_hook_add(
+ struct xfs_perag *pag,
+ struct xfs_rmap_hook *hook)
+{
+ return xfs_hooks_add(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+}
+
+/* Stop calling the specified function during a reverse mapping update. */
+void
+xfs_rmap_hook_del(
+ struct xfs_perag *pag,
+ struct xfs_rmap_hook *hook)
+{
+ xfs_hooks_del(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+}
+
+/* Configure rmap update hook functions. */
+void
+xfs_rmap_hook_setup(
+ struct xfs_rmap_hook *hook,
+ notifier_fn_t mod_fn)
+{
+ xfs_hook_setup(&hook->rmap_hook, mod_fn);
+}
+#else
+# define xfs_rmap_update_hook(t, p, o, s, b, u, oi) do { } while (0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
/*
* Remove a reference to an extent in the rmap btree.
*/
@@ -827,7 +921,7 @@ xfs_rmap_free(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
-
+ xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo);
error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -1079,6 +1173,7 @@ xfs_rmap_alloc(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
+ xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo);
error = xfs_rmap_map(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -2404,15 +2499,12 @@ xfs_rmap_map_raw(
{
struct xfs_owner_info oinfo;
- oinfo.oi_owner = rmap->rm_owner;
- oinfo.oi_offset = rmap->rm_offset;
- oinfo.oi_flags = 0;
- if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
- oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
- if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
- oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+ xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset,
+ rmap->rm_flags);
- if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+ if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
+ XFS_RMAP_UNWRITTEN)) ||
+ XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
return xfs_rmap_map(cur, rmap->rm_startblock,
rmap->rm_blockcount,
rmap->rm_flags & XFS_RMAP_UNWRITTEN,
@@ -2442,7 +2534,7 @@ xfs_rmap_query_range_helper(
fa = xfs_rmap_btrec_to_irec(rec, &irec);
if (!fa)
- fa = xfs_rmap_check_irec(cur, &irec);
+ fa = xfs_rmap_check_btrec(cur, &irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, &irec);
@@ -2497,6 +2589,38 @@ xfs_rmap_finish_one_cleanup(
xfs_trans_brelse(tp, agbp);
}
+/* Commit an rmap operation into the ondisk tree. */
+int
+__xfs_rmap_finish_intent(
+ struct xfs_btree_cur *rcur,
+ enum xfs_rmap_intent_type op,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ bool unwritten)
+{
+ switch (op) {
+ case XFS_RMAP_ALLOC:
+ case XFS_RMAP_MAP:
+ return xfs_rmap_map(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_MAP_SHARED:
+ return xfs_rmap_map_shared(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_FREE:
+ case XFS_RMAP_UNMAP:
+ return xfs_rmap_unmap(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_UNMAP_SHARED:
+ return xfs_rmap_unmap_shared(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_CONVERT:
+ return xfs_rmap_convert(rcur, bno, len, !unwritten, oinfo);
+ case XFS_RMAP_CONVERT_SHARED:
+ return xfs_rmap_convert_shared(rcur, bno, len, !unwritten,
+ oinfo);
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+}
+
/*
* Process one of the deferred rmap operations. We pass back the
* btree cursor to maintain our lock on the rmapbt between calls.
@@ -2563,39 +2687,14 @@ xfs_rmap_finish_one(
unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock);
- switch (ri->ri_type) {
- case XFS_RMAP_ALLOC:
- case XFS_RMAP_MAP:
- error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount,
- unwritten, &oinfo);
- break;
- case XFS_RMAP_MAP_SHARED:
- error = xfs_rmap_map_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, unwritten, &oinfo);
- break;
- case XFS_RMAP_FREE:
- case XFS_RMAP_UNMAP:
- error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount,
- unwritten, &oinfo);
- break;
- case XFS_RMAP_UNMAP_SHARED:
- error = xfs_rmap_unmap_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, unwritten, &oinfo);
- break;
- case XFS_RMAP_CONVERT:
- error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount,
- !unwritten, &oinfo);
- break;
- case XFS_RMAP_CONVERT_SHARED:
- error = xfs_rmap_convert_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, !unwritten, &oinfo);
- break;
- default:
- ASSERT(0);
- error = -EFSCORRUPTED;
- }
+ error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno,
+ ri->ri_bmap.br_blockcount, &oinfo, unwritten);
+ if (error)
+ return error;
- return error;
+ xfs_rmap_update_hook(tp, ri->ri_pag, ri->ri_type, bno,
+ ri->ri_bmap.br_blockcount, unwritten, &oinfo);
+ return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 3c98d9d50afb..9d01fe689497 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -186,6 +186,10 @@ void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur);
+int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur,
+ enum xfs_rmap_intent_type op, xfs_agblock_t bno,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo,
+ bool unwritten);
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
@@ -195,7 +199,7 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a,
union xfs_btree_rec;
xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
-xfs_failaddr_t xfs_rmap_check_irec(struct xfs_btree_cur *cur,
+xfs_failaddr_t xfs_rmap_check_irec(struct xfs_perag *pag,
const struct xfs_rmap_irec *irec);
int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,
@@ -235,4 +239,29 @@ extern struct kmem_cache *xfs_rmap_intent_cache;
int __init xfs_rmap_intent_init_cache(void);
void xfs_rmap_intent_destroy_cache(void);
+/*
+ * Parameters for tracking reverse mapping changes. The hook function arg
+ * parameter is enum xfs_rmap_intent_type, and the rest is below.
+ */
+struct xfs_rmap_update_params {
+ xfs_agblock_t startblock;
+ xfs_extlen_t blockcount;
+ struct xfs_owner_info oinfo;
+ bool unwritten;
+};
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+
+struct xfs_rmap_hook {
+ struct xfs_hook rmap_hook;
+};
+
+void xfs_rmap_hook_disable(void);
+void xfs_rmap_hook_enable(void);
+
+int xfs_rmap_hook_add(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
+void xfs_rmap_hook_del(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
+void xfs_rmap_hook_setup(struct xfs_rmap_hook *hook, notifier_fn_t mod_fn);
+#endif
+
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 0751268c102c..9e759efa81cc 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -22,6 +22,8 @@
#include "xfs_extent_busy.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
static struct kmem_cache *xfs_rmapbt_cur_cache;
@@ -342,7 +344,18 @@ xfs_rmapbt_verify(
level = be16_to_cpu(block->bb_level);
if (pag && xfs_perag_initialised_agf(pag)) {
- if (level >= pag->pagf_rmap_level)
+ unsigned int maxlevel = pag->pagf_rmap_level;
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ /*
+ * Online repair could be rewriting the free space btrees, so
+ * we'll validate against the larger of either tree while this
+ * is going on.
+ */
+ maxlevel = max_t(unsigned int, maxlevel,
+ pag->pagf_repair_rmap_level);
+#endif
+ if (level >= maxlevel)
return __this_address;
} else if (level >= mp->m_rmap_maxlevels)
return __this_address;
@@ -530,6 +543,151 @@ xfs_rmapbt_init_cursor(
return cur;
}
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+static inline unsigned int
+xfs_rmapbt_mem_block_maxrecs(
+ unsigned int blocklen,
+ bool leaf)
+{
+ if (leaf)
+ return blocklen / sizeof(struct xfs_rmap_rec);
+ return blocklen /
+ (2 * sizeof(struct xfs_rmap_key) + sizeof(__be64));
+}
+
+/*
+ * Validate an in-memory rmap btree block. Callers are allowed to generate an
+ * in-memory btree even if the ondisk feature is not enabled.
+ */
+static xfs_failaddr_t
+xfs_rmapbt_mem_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_failaddr_t fa;
+ unsigned int level;
+ unsigned int maxrecs;
+
+ if (!xfs_verify_magic(bp, block->bb_magic))
+ return __this_address;
+
+ fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+ if (fa)
+ return fa;
+
+ level = be16_to_cpu(block->bb_level);
+ if (level >= xfs_rmapbt_maxlevels_ondisk())
+ return __this_address;
+
+ maxrecs = xfs_rmapbt_mem_block_maxrecs(
+ XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN, level == 0);
+ return xfs_btree_memblock_verify(bp, maxrecs);
+}
+
+static void
+xfs_rmapbt_mem_rw_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp);
+
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+/* skip crc checks on in-memory btrees to save time */
+static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
+ .name = "xfs_rmapbt_mem",
+ .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
+ .verify_read = xfs_rmapbt_mem_rw_verify,
+ .verify_write = xfs_rmapbt_mem_rw_verify,
+ .verify_struct = xfs_rmapbt_mem_verify,
+};
+
+const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
+ .name = "mem_rmap",
+ .type = XFS_BTREE_TYPE_MEM,
+ .geom_flags = XFS_BTGEO_OVERLAPPING,
+
+ .rec_len = sizeof(struct xfs_rmap_rec),
+ /* Overlapping btree; 2 keys per pointer. */
+ .key_len = 2 * sizeof(struct xfs_rmap_key),
+ .ptr_len = XFS_BTREE_LONG_PTR_LEN,
+
+ .lru_refs = XFS_RMAP_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_rmap_mem_2),
+
+ .dup_cursor = xfbtree_dup_cursor,
+ .set_root = xfbtree_set_root,
+ .alloc_block = xfbtree_alloc_block,
+ .free_block = xfbtree_free_block,
+ .get_minrecs = xfbtree_get_minrecs,
+ .get_maxrecs = xfbtree_get_maxrecs,
+ .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
+ .key_diff = xfs_rmapbt_key_diff,
+ .buf_ops = &xfs_rmapbt_mem_buf_ops,
+ .diff_two_keys = xfs_rmapbt_diff_two_keys,
+ .keys_inorder = xfs_rmapbt_keys_inorder,
+ .recs_inorder = xfs_rmapbt_recs_inorder,
+ .keys_contiguous = xfs_rmapbt_keys_contiguous,
+};
+
+/* Create a cursor for an in-memory btree. */
+struct xfs_btree_cur *
+xfs_rmapbt_mem_cursor(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ struct xfbtree *xfbt)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = pag->pag_mount;
+
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
+ xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
+ cur->bc_mem.xfbtree = xfbt;
+ cur->bc_nlevels = xfbt->nlevels;
+
+ cur->bc_mem.pag = xfs_perag_hold(pag);
+ return cur;
+}
+
+/* Create an in-memory rmap btree. */
+int
+xfs_rmapbt_mem_init(
+ struct xfs_mount *mp,
+ struct xfbtree *xfbt,
+ struct xfs_buftarg *btp,
+ xfs_agnumber_t agno)
+{
+ xfbt->owner = agno;
+ return xfbtree_init(mp, xfbt, btp, &xfs_rmapbt_mem_ops);
+}
+
+/* Compute the max possible height for reverse mapping btrees in memory. */
+static unsigned int
+xfs_rmapbt_mem_maxlevels(void)
+{
+ unsigned int minrecs[2];
+ unsigned int blocklen;
+
+ blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
+
+ minrecs[0] = xfs_rmapbt_mem_block_maxrecs(blocklen, true) / 2;
+ minrecs[1] = xfs_rmapbt_mem_block_maxrecs(blocklen, false) / 2;
+
+ /*
+ * How tall can an in-memory rmap btree become if we filled the entire
+ * AG with rmap records?
+ */
+ return xfs_btree_compute_maxlevels(minrecs,
+ XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec));
+}
+#else
+# define xfs_rmapbt_mem_maxlevels() (0)
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
+
/*
* Install a new reverse mapping btree root. Caller is responsible for
* invalidating and freeing the old btree blocks.
@@ -600,7 +758,8 @@ xfs_rmapbt_maxlevels_ondisk(void)
* like if it consumes almost all the blocks in the AG due to maximal
* sharing factor.
*/
- return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
+ return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS),
+ xfs_rmapbt_mem_maxlevels());
}
/* Compute the maximum height of an rmap btree. */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 27536d7e14aa..eb90d89e8086 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -10,6 +10,7 @@ struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
struct xbtree_afakeroot;
+struct xfbtree;
/* rmaps only exist on crc enabled filesystems */
#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
@@ -62,4 +63,9 @@ unsigned int xfs_rmapbt_maxlevels_ondisk(void);
int __init xfs_rmapbt_init_cur_cache(void);
void xfs_rmapbt_destroy_cur_cache(void);
+struct xfs_btree_cur *xfs_rmapbt_mem_cursor(struct xfs_perag *pag,
+ struct xfs_trans *tp, struct xfbtree *xfbtree);
+int xfs_rmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
+ struct xfs_buftarg *btp, xfs_agnumber_t agno);
+
#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 6b8bc276d461..cab49e7116ec 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -51,6 +51,7 @@ extern const struct xfs_btree_ops xfs_finobt_ops;
extern const struct xfs_btree_ops xfs_bmbt_ops;
extern const struct xfs_btree_ops xfs_refcountbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_ops;
+extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;
static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
{
@@ -87,6 +88,15 @@ static inline bool xfs_btree_is_rmap(const struct xfs_btree_ops *ops)
return ops == &xfs_rmapbt_ops;
}
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_rmapbt_mem_ops;
+}
+#else
+# define xfs_btree_is_mem_rmap(...) (false)
+#endif
+
/* log size calculation functions */
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int xfs_log_calc_minimum_size(struct xfs_mount *);
diff --git a/fs/xfs/scrub/agb_bitmap.h b/fs/xfs/scrub/agb_bitmap.h
index ed08f76ff4f3..e488e1f4f63d 100644
--- a/fs/xfs/scrub/agb_bitmap.h
+++ b/fs/xfs/scrub/agb_bitmap.h
@@ -65,4 +65,9 @@ int xagb_bitmap_set_btblocks(struct xagb_bitmap *bitmap,
int xagb_bitmap_set_btcur_path(struct xagb_bitmap *bitmap,
struct xfs_btree_cur *cur);
+static inline uint32_t xagb_bitmap_count_set_regions(struct xagb_bitmap *b)
+{
+ return xbitmap32_count_set_regions(&b->agbitmap);
+}
+
#endif /* __XFS_SCRUB_AGB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index 1449bb5262d9..0cb8d43912a8 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -566,3 +566,17 @@ xbitmap32_test(
*len = bn->bn_start - start;
return false;
}
+
+/* Count the number of set regions in this bitmap. */
+uint32_t
+xbitmap32_count_set_regions(
+ struct xbitmap32 *bitmap)
+{
+ struct xbitmap32_node *bn;
+ uint32_t nr = 0;
+
+ for_each_xbitmap32_extent(bn, bitmap)
+ nr++;
+
+ return nr;
+}
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
index 2df8911606d6..710c1ac5e323 100644
--- a/fs/xfs/scrub/bitmap.h
+++ b/fs/xfs/scrub/bitmap.h
@@ -62,4 +62,6 @@ int xbitmap32_walk(struct xbitmap32 *bitmap, xbitmap32_walk_fn fn,
bool xbitmap32_empty(struct xbitmap32 *bitmap);
bool xbitmap32_test(struct xbitmap32 *bitmap, uint32_t start, uint32_t *len);
+uint32_t xbitmap32_count_set_regions(struct xbitmap32 *bitmap);
+
#endif /* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index b169cddde6da..24a15bf784f1 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -924,7 +924,7 @@ xchk_bmap(
if (!ifp)
return -ENOENT;
- info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
+ info.is_rt = xfs_ifork_is_realtime(ip, whichfork);
info.whichfork = whichfork;
info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
info.sc = sc;
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 70746a7db954..abff79a77c72 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -460,7 +460,7 @@ xchk_perag_read_headers(
* Grab the AG headers for the attached perag structure and wait for pending
* intents to drain.
*/
-static int
+int
xchk_perag_drain_and_lock(
struct xfs_scrub *sc)
{
@@ -1309,6 +1309,9 @@ xchk_fsgates_enable(
if (scrub_fsgates & XCHK_FSGATES_DIRENTS)
xfs_dir_hook_enable();
+ if (scrub_fsgates & XCHK_FSGATES_RMAP)
+ xfs_rmap_hook_enable();
+
sc->flags |= scrub_fsgates;
}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 529a510dc76f..89f7bbec887e 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -134,6 +134,7 @@ int xchk_setup_nlinks(struct xfs_scrub *sc);
void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
struct xchk_ag *sa);
+int xchk_perag_drain_and_lock(struct xfs_scrub *sc);
/*
* Grab all AG resources, treating the inability to grab the perag structure as
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 608d7ab01d89..4a0271123d94 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -239,7 +239,11 @@ xrep_newbt_alloc_ag_blocks(
xrep_newbt_validate_ag_alloc_hint(xnr);
- error = xfs_alloc_vextent_near_bno(&args, xnr->alloc_hint);
+ if (xnr->alloc_vextent)
+ error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
+ else
+ error = xfs_alloc_vextent_near_bno(&args,
+ xnr->alloc_hint);
if (error)
return error;
if (args.fsbno == NULLFSBLOCK)
@@ -309,7 +313,11 @@ xrep_newbt_alloc_file_blocks(
xrep_newbt_validate_file_alloc_hint(xnr);
- error = xfs_alloc_vextent_start_ag(&args, xnr->alloc_hint);
+ if (xnr->alloc_vextent)
+ error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
+ else
+ error = xfs_alloc_vextent_start_ag(&args,
+ xnr->alloc_hint);
if (error)
return error;
if (args.fsbno == NULLFSBLOCK)
diff --git a/fs/xfs/scrub/newbt.h b/fs/xfs/scrub/newbt.h
index 89f8e3970b1f..3d804d31af24 100644
--- a/fs/xfs/scrub/newbt.h
+++ b/fs/xfs/scrub/newbt.h
@@ -6,6 +6,8 @@
#ifndef __XFS_SCRUB_NEWBT_H__
#define __XFS_SCRUB_NEWBT_H__
+struct xfs_alloc_arg;
+
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
@@ -28,6 +30,11 @@ struct xrep_newbt_resv {
struct xrep_newbt {
struct xfs_scrub *sc;
+ /* Custom allocation function, or NULL for xfs_alloc_vextent */
+ int (*alloc_vextent)(struct xfs_scrub *sc,
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t alloc_hint);
+
/* List of extents that we've reserved. */
struct list_head resv_list;
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index f99eca799809..0252a3b5b65a 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -114,7 +114,7 @@ xreap_put_freelist(
int error;
/* Make sure there's space on the freelist. */
- error = xrep_fix_freelist(sc, true);
+ error = xrep_fix_freelist(sc, 0);
if (error)
return error;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index d1a21f380abe..f43dce771cdd 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -31,12 +31,14 @@
#include "xfs_error.h"
#include "xfs_reflink.h"
#include "xfs_health.h"
+#include "xfs_buf_mem.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/stats.h"
+#include "scrub/xfile.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -401,7 +403,7 @@ xrep_calc_ag_resblks(
int
xrep_fix_freelist(
struct xfs_scrub *sc,
- bool can_shrink)
+ int alloc_flags)
{
struct xfs_alloc_arg args = {0};
@@ -411,8 +413,7 @@ xrep_fix_freelist(
args.alignment = 1;
args.pag = sc->sa.pag;
- return xfs_alloc_fix_freelist(&args,
- can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
+ return xfs_alloc_fix_freelist(&args, alloc_flags);
}
/*
@@ -1148,3 +1149,55 @@ xrep_metadata_inode_forks(
return 0;
}
+
+/*
+ * Set up an in-memory buffer cache so that we can use the xfbtree. Allocating
+ * a shmem file might take loks, so we cannot be in transaction context. Park
+ * our resources in the scrub context and let the teardown function take care
+ * of them at the right time.
+ */
+int
+xrep_setup_xfbtree(
+ struct xfs_scrub *sc,
+ const char *descr)
+{
+ ASSERT(sc->tp == NULL);
+
+ return xmbuf_alloc(sc->mp, descr, &sc->xmbtp);
+}
+
+/*
+ * Create a dummy transaction for use in a live update hook function. This
+ * function MUST NOT be called from regular repair code because the current
+ * process' transaction is saved via the cookie.
+ */
+int
+xrep_trans_alloc_hook_dummy(
+ struct xfs_mount *mp,
+ void **cookiep,
+ struct xfs_trans **tpp)
+{
+ int error;
+
+ *cookiep = current->journal_info;
+ current->journal_info = NULL;
+
+ error = xfs_trans_alloc_empty(mp, tpp);
+ if (!error)
+ return 0;
+
+ current->journal_info = *cookiep;
+ *cookiep = NULL;
+ return error;
+}
+
+/* Cancel a dummy transaction used by a live update hook function. */
+void
+xrep_trans_cancel_hook_dummy(
+ void **cookiep,
+ struct xfs_trans *tp)
+{
+ xfs_trans_cancel(tp);
+ current->journal_info = *cookiep;
+ *cookiep = NULL;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 2ff2bb79c540..dd1c89e8714c 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -51,7 +51,7 @@ struct xbitmap;
struct xagb_bitmap;
struct xfsb_bitmap;
-int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
+int xrep_fix_freelist(struct xfs_scrub *sc, int alloc_flags);
struct xrep_find_ag_btree {
/* in: rmap owner of the btree we're looking for */
@@ -81,11 +81,14 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
# define xrep_ino_dqattach(sc) (0)
#endif /* CONFIG_XFS_QUOTA */
+int xrep_setup_xfbtree(struct xfs_scrub *sc, const char *descr);
+
int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork,
xfs_extnum_t nextents);
int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
+int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
@@ -111,6 +114,7 @@ int xrep_agfl(struct xfs_scrub *sc);
int xrep_agi(struct xfs_scrub *sc);
int xrep_allocbt(struct xfs_scrub *sc);
int xrep_iallocbt(struct xfs_scrub *sc);
+int xrep_rmapbt(struct xfs_scrub *sc);
int xrep_refcountbt(struct xfs_scrub *sc);
int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
@@ -136,6 +140,10 @@ int xrep_quotacheck(struct xfs_scrub *sc);
int xrep_reinit_pagf(struct xfs_scrub *sc);
int xrep_reinit_pagi(struct xfs_scrub *sc);
+int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep,
+ struct xfs_trans **tpp);
+void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp);
+
#else
#define xrep_ino_dqattach(sc) (0)
@@ -177,6 +185,7 @@ xrep_setup_nothing(
return 0;
}
#define xrep_setup_ag_allocbt xrep_setup_nothing
+#define xrep_setup_ag_rmapbt xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
@@ -190,6 +199,7 @@ xrep_setup_nothing(
#define xrep_agi xrep_notsupported
#define xrep_allocbt xrep_notsupported
#define xrep_iallocbt xrep_notsupported
+#define xrep_rmapbt xrep_notsupported
#define xrep_refcountbt xrep_notsupported
#define xrep_inode xrep_notsupported
#define xrep_bmap_data xrep_notsupported
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 5afe6650ed6c..ba5bbc3fb754 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -25,6 +25,7 @@
#include "scrub/btree.h"
#include "scrub/bitmap.h"
#include "scrub/agb_bitmap.h"
+#include "scrub/repair.h"
/*
* Set us up to scrub reverse mapping btrees.
@@ -36,6 +37,14 @@ xchk_setup_ag_rmapbt(
if (xchk_need_intent_drain(sc))
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+ if (xchk_could_repair(sc)) {
+ int error;
+
+ error = xrep_setup_ag_rmapbt(sc);
+ if (error)
+ return error;
+ }
+
return xchk_setup_ag_btree(sc, false);
}
@@ -349,7 +358,7 @@ xchk_rmapbt_rec(
struct xfs_rmap_irec irec;
if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
- xfs_rmap_check_irec(bs->cur, &irec) != NULL) {
+ xfs_rmap_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
new file mode 100644
index 000000000000..e8e07b683eab
--- /dev/null
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -0,0 +1,1697 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_btree_staging.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_ag.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/agb_bitmap.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/iscan.h"
+#include "scrub/newbt.h"
+#include "scrub/reap.h"
+
+/*
+ * Reverse Mapping Btree Repair
+ * ============================
+ *
+ * This is the most involved of all the AG space btree rebuilds. Everywhere
+ * else in XFS we lock inodes and then AG data structures, but generating the
+ * list of rmap records requires that we be able to scan both block mapping
+ * btrees of every inode in the filesystem to see if it owns any extents in
+ * this AG. We can't tolerate any inode updates while we do this, so we
+ * freeze the filesystem to lock everyone else out, and grant ourselves
+ * special privileges to run transactions with regular background reclamation
+ * turned off.
+ *
+ * We also have to be very careful not to allow inode reclaim to start a
+ * transaction because all transactions (other than our own) will block.
+ * Deferred inode inactivation helps us out there.
+ *
+ * I) Reverse mappings for all non-space metadata and file data are collected
+ * according to the following algorithm:
+ *
+ * 1. For each fork of each inode:
+ * 1.1. Create a bitmap BMBIT to track bmbt blocks if necessary.
+ * 1.2. If the incore extent map isn't loaded, walk the bmbt to accumulate
+ * bmaps into rmap records (see 1.1.4). Set bits in BMBIT for each btree
+ * block.
+ * 1.3. If the incore extent map is loaded but the fork is in btree format,
+ * just visit the bmbt blocks to set the corresponding BMBIT areas.
+ * 1.4. From the incore extent map, accumulate each bmap that falls into our
+ * target AG. Remember, multiple bmap records can map to a single rmap
+ * record, so we cannot simply emit rmap records 1:1.
+ * 1.5. Emit rmap records for each extent in BMBIT and free it.
+ * 2. Create bitmaps INOBIT and ICHUNKBIT.
+ * 3. For each record in the inobt, set the corresponding areas in ICHUNKBIT,
+ * and set bits in INOBIT for each btree block. If the inobt has no records
+ * at all, we must be careful to record its root in INOBIT.
+ * 4. For each block in the finobt, set the corresponding INOBIT area.
+ * 5. Emit rmap records for each extent in INOBIT and ICHUNKBIT and free them.
+ * 6. Create bitmaps REFCBIT and COWBIT.
+ * 7. For each CoW staging extent in the refcountbt, set the corresponding
+ * areas in COWBIT.
+ * 8. For each block in the refcountbt, set the corresponding REFCBIT area.
+ * 9. Emit rmap records for each extent in REFCBIT and COWBIT and free them.
+ * A. Emit rmap for the AG headers.
+ * B. Emit rmap for the log, if there is one.
+ *
+ * II) The rmapbt shape and space metadata rmaps are computed as follows:
+ *
+ * 1. Count the rmaps collected in the previous step. (= NR)
+ * 2. Estimate the number of rmapbt blocks needed to store NR records. (= RMB)
+ * 3. Reserve RMB blocks through the newbt using the allocator in normap mode.
+ * 4. Create bitmap AGBIT.
+ * 5. For each reservation in the newbt, set the corresponding areas in AGBIT.
+ * 6. For each block in the AGFL, bnobt, and cntbt, set the bits in AGBIT.
+ * 7. Count the extents in AGBIT. (= AGNR)
+ * 8. Estimate the number of rmapbt blocks needed for NR + AGNR rmaps. (= RMB')
+ * 9. If RMB' >= RMB, reserve RMB' - RMB more newbt blocks, set RMB = RMB',
+ * and clear AGBIT. Go to step 5.
+ * A. Emit rmaps for each extent in AGBIT.
+ *
+ * III) The rmapbt is constructed and set in place as follows:
+ *
+ * 1. Sort the rmap records.
+ * 2. Bulk load the rmaps.
+ *
+ * IV) Reap the old btree blocks.
+ *
+ * 1. Create a bitmap OLDRMBIT.
+ * 2. For each gap in the new rmapbt, set the corresponding areas of OLDRMBIT.
+ * 3. For each extent in the bnobt, clear the corresponding parts of OLDRMBIT.
+ * 4. Reap the extents corresponding to the set areas in OLDRMBIT. These are
+ * the parts of the AG that the rmap didn't find during its scan of the
+ * primary metadata and aren't known to be in the free space, which implies
+ * that they were the old rmapbt blocks.
+ * 5. Commit.
+ *
+ * We use the 'xrep_rmap' prefix for all the rmap functions.
+ */
+
+/* Context for collecting rmaps */
+struct xrep_rmap {
+ /* new rmapbt information */
+ struct xrep_newbt new_btree;
+
+ /* lock for the xfbtree and xfile */
+ struct mutex lock;
+
+ /* rmap records generated from primary metadata */
+ struct xfbtree rmap_btree;
+
+ struct xfs_scrub *sc;
+
+ /* in-memory btree cursor for the xfs_btree_bload iteration */
+ struct xfs_btree_cur *mcur;
+
+ /* Hooks into rmap update code. */
+ struct xfs_rmap_hook rhook;
+
+ /* inode scan cursor */
+ struct xchk_iscan iscan;
+
+ /* Number of non-freespace records found. */
+ unsigned long long nr_records;
+
+ /* bnobt/cntbt contribution to btreeblks */
+ xfs_agblock_t freesp_btblocks;
+
+ /* old agf_rmap_blocks counter */
+ unsigned int old_rmapbt_fsbcount;
+};
+
+/* Set us up to repair reverse mapping btrees. */
+int
+xrep_setup_ag_rmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rmap *rr;
+ char *descr;
+ int error;
+
+ xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
+
+ descr = xchk_xfile_ag_descr(sc, "reverse mapping records");
+ error = xrep_setup_xfbtree(sc, descr);
+ kfree(descr);
+ if (error)
+ return error;
+
+ rr = kzalloc(sizeof(struct xrep_rmap), XCHK_GFP_FLAGS);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->sc = sc;
+ sc->buf = rr;
+ return 0;
+}
+
+/* Make sure there's nothing funny about this mapping. */
+STATIC int
+xrep_rmap_check_mapping(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (xfs_rmap_check_irec(sc->sa.pag, rec) != NULL)
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
+ rec->rm_blockcount, &outcome);
+ if (error)
+ return error;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+
+/* Store a reverse-mapping record. */
+static inline int
+xrep_rmap_stash(
+ struct xrep_rmap *rr,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount,
+ uint64_t owner,
+ uint64_t offset,
+ unsigned int flags)
+{
+ struct xfs_rmap_irec rmap = {
+ .rm_startblock = startblock,
+ .rm_blockcount = blockcount,
+ .rm_owner = owner,
+ .rm_offset = offset,
+ .rm_flags = flags,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_btree_cur *mcur;
+ int error = 0;
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap);
+
+ mutex_lock(&rr->lock);
+ mcur = xfs_rmapbt_mem_cursor(sc->sa.pag, sc->tp, &rr->rmap_btree);
+ error = xfs_rmap_map_raw(mcur, &rmap);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(&rr->rmap_btree, sc->tp);
+ if (error)
+ goto out_abort;
+
+ mutex_unlock(&rr->lock);
+ return 0;
+
+out_cancel:
+ xfbtree_trans_cancel(&rr->rmap_btree, sc->tp);
+out_abort:
+ xchk_iscan_abort(&rr->iscan);
+ mutex_unlock(&rr->lock);
+ return error;
+}
+
+struct xrep_rmap_stash_run {
+ struct xrep_rmap *rr;
+ uint64_t owner;
+ unsigned int rmap_flags;
+};
+
+static int
+xrep_rmap_stash_run(
+ uint32_t start,
+ uint32_t len,
+ void *priv)
+{
+ struct xrep_rmap_stash_run *rsr = priv;
+ struct xrep_rmap *rr = rsr->rr;
+
+ return xrep_rmap_stash(rr, start, len, rsr->owner, 0, rsr->rmap_flags);
+}
+
+/*
+ * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
+ * that the ranges are in units of FS blocks.
+ */
+STATIC int
+xrep_rmap_stash_bitmap(
+ struct xrep_rmap *rr,
+ struct xagb_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo)
+{
+ struct xrep_rmap_stash_run rsr = {
+ .rr = rr,
+ .owner = oinfo->oi_owner,
+ .rmap_flags = 0,
+ };
+
+ if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
+ rsr.rmap_flags |= XFS_RMAP_ATTR_FORK;
+ if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
+ rsr.rmap_flags |= XFS_RMAP_BMBT_BLOCK;
+
+ return xagb_bitmap_walk(bitmap, xrep_rmap_stash_run, &rsr);
+}
+
+/* Section (I): Finding all file and bmbt extents. */
+
+/* Context for accumulating rmaps for an inode fork. */
+struct xrep_rmap_ifork {
+ /*
+ * Accumulate rmap data here to turn multiple adjacent bmaps into a
+ * single rmap.
+ */
+ struct xfs_rmap_irec accum;
+
+ /* Bitmap of bmbt blocks in this AG. */
+ struct xagb_bitmap bmbt_blocks;
+
+ struct xrep_rmap *rr;
+
+ /* Which inode fork? */
+ int whichfork;
+};
+
+/* Stash an rmap that we accumulated while walking an inode fork. */
+STATIC int
+xrep_rmap_stash_accumulated(
+ struct xrep_rmap_ifork *rf)
+{
+ if (rf->accum.rm_blockcount == 0)
+ return 0;
+
+ return xrep_rmap_stash(rf->rr, rf->accum.rm_startblock,
+ rf->accum.rm_blockcount, rf->accum.rm_owner,
+ rf->accum.rm_offset, rf->accum.rm_flags);
+}
+
+/* Accumulate a bmbt record. */
+STATIC int
+xrep_rmap_visit_bmbt(
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *rec,
+ void *priv)
+{
+ struct xrep_rmap_ifork *rf = priv;
+ struct xfs_mount *mp = rf->rr->sc->mp;
+ struct xfs_rmap_irec *accum = &rf->accum;
+ xfs_agblock_t agbno;
+ unsigned int rmap_flags = 0;
+ int error;
+
+ if (XFS_FSB_TO_AGNO(mp, rec->br_startblock) !=
+ rf->rr->sc->sa.pag->pag_agno)
+ return 0;
+
+ agbno = XFS_FSB_TO_AGBNO(mp, rec->br_startblock);
+ if (rf->whichfork == XFS_ATTR_FORK)
+ rmap_flags |= XFS_RMAP_ATTR_FORK;
+ if (rec->br_state == XFS_EXT_UNWRITTEN)
+ rmap_flags |= XFS_RMAP_UNWRITTEN;
+
+ /* If this bmap is adjacent to the previous one, just add it. */
+ if (accum->rm_blockcount > 0 &&
+ rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
+ agbno == accum->rm_startblock + accum->rm_blockcount &&
+ rmap_flags == accum->rm_flags) {
+ accum->rm_blockcount += rec->br_blockcount;
+ return 0;
+ }
+
+ /* Otherwise stash the old rmap and start accumulating a new one. */
+ error = xrep_rmap_stash_accumulated(rf);
+ if (error)
+ return error;
+
+ accum->rm_startblock = agbno;
+ accum->rm_blockcount = rec->br_blockcount;
+ accum->rm_offset = rec->br_startoff;
+ accum->rm_flags = rmap_flags;
+ return 0;
+}
+
+/* Add a btree block to the bitmap. */
+STATIC int
+xrep_rmap_visit_iroot_btree_block(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xrep_rmap_ifork *rf = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsbno;
+ xfs_agblock_t agbno;
+
+ xfs_btree_get_block(cur, level, &bp);
+ if (!bp)
+ return 0;
+
+ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
+ if (XFS_FSB_TO_AGNO(cur->bc_mp, fsbno) != rf->rr->sc->sa.pag->pag_agno)
+ return 0;
+
+ agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
+ return xagb_bitmap_set(&rf->bmbt_blocks, agbno, 1);
+}
+
+/*
+ * Iterate a metadata btree rooted in an inode to collect rmap records for
+ * anything in this fork that matches the AG.
+ */
+STATIC int
+xrep_rmap_scan_iroot_btree(
+ struct xrep_rmap_ifork *rf,
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_owner_info oinfo;
+ struct xrep_rmap *rr = rf->rr;
+ int error;
+
+ xagb_bitmap_init(&rf->bmbt_blocks);
+
+ /* Record all the blocks in the btree itself. */
+ error = xfs_btree_visit_blocks(cur, xrep_rmap_visit_iroot_btree_block,
+ XFS_BTREE_VISIT_ALL, rf);
+ if (error)
+ goto out;
+
+ /* Emit rmaps for the btree blocks. */
+ xfs_rmap_ino_bmbt_owner(&oinfo, rf->accum.rm_owner, rf->whichfork);
+ error = xrep_rmap_stash_bitmap(rr, &rf->bmbt_blocks, &oinfo);
+ if (error)
+ goto out;
+
+ /* Stash any remaining accumulated rmaps. */
+ error = xrep_rmap_stash_accumulated(rf);
+out:
+ xagb_bitmap_destroy(&rf->bmbt_blocks);
+ return error;
+}
+
+static inline bool
+is_rt_data_fork(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ return XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK;
+}
+
+/*
+ * Iterate the block mapping btree to collect rmap records for anything in this
+ * fork that matches the AG. Sets @mappings_done to true if we've scanned the
+ * block mappings in this fork.
+ */
+STATIC int
+xrep_rmap_scan_bmbt(
+ struct xrep_rmap_ifork *rf,
+ struct xfs_inode *ip,
+ bool *mappings_done)
+{
+ struct xrep_rmap *rr = rf->rr;
+ struct xfs_btree_cur *cur;
+ struct xfs_ifork *ifp;
+ int error;
+
+ *mappings_done = false;
+ ifp = xfs_ifork_ptr(ip, rf->whichfork);
+ cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, rf->whichfork);
+
+ if (!xfs_ifork_is_realtime(ip, rf->whichfork) &&
+ xfs_need_iread_extents(ifp)) {
+ /*
+ * If the incore extent cache isn't loaded, scan the bmbt for
+ * mapping records. This avoids loading the incore extent
+ * tree, which will increase memory pressure at a time when
+ * we're trying to run as quickly as we possibly can. Ignore
+ * realtime extents.
+ */
+ error = xfs_bmap_query_all(cur, xrep_rmap_visit_bmbt, rf);
+ if (error)
+ goto out_cur;
+
+ *mappings_done = true;
+ }
+
+ /* Scan for the bmbt blocks, which always live on the data device. */
+ error = xrep_rmap_scan_iroot_btree(rf, cur);
+out_cur:
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/*
+ * Iterate the in-core extent cache to collect rmap records for anything in
+ * this fork that matches the AG.
+ */
+STATIC int
+xrep_rmap_scan_iext(
+ struct xrep_rmap_ifork *rf,
+ struct xfs_ifork *ifp)
+{
+ struct xfs_bmbt_irec rec;
+ struct xfs_iext_cursor icur;
+ int error;
+
+ for_each_xfs_iext(ifp, &icur, &rec) {
+ if (isnullstartblock(rec.br_startblock))
+ continue;
+ error = xrep_rmap_visit_bmbt(NULL, &rec, rf);
+ if (error)
+ return error;
+ }
+
+ return xrep_rmap_stash_accumulated(rf);
+}
+
+/* Find all the extents from a given AG in an inode fork. */
+STATIC int
+xrep_rmap_scan_ifork(
+ struct xrep_rmap *rr,
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xrep_rmap_ifork rf = {
+ .accum = { .rm_owner = ip->i_ino, },
+ .rr = rr,
+ .whichfork = whichfork,
+ };
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
+ int error = 0;
+
+ if (!ifp)
+ return 0;
+
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
+ bool mappings_done;
+
+ /*
+ * Scan the bmap btree for data device mappings. This includes
+ * the btree blocks themselves, even if this is a realtime
+ * file.
+ */
+ error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done);
+ if (error || mappings_done)
+ return error;
+ } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
+ return 0;
+ }
+
+ /* Scan incore extent cache if this isn't a realtime file. */
+ if (xfs_ifork_is_realtime(ip, whichfork))
+ return 0;
+
+ return xrep_rmap_scan_iext(&rf, ifp);
+}
+
+/*
+ * Take ILOCK on a file that we want to scan.
+ *
+ * Select ILOCK_EXCL if the file has an unloaded data bmbt or has an unloaded
+ * attr bmbt. Otherwise, take ILOCK_SHARED.
+ */
+static inline unsigned int
+xrep_rmap_scan_ilock(
+ struct xfs_inode *ip)
+{
+ uint lock_mode = XFS_ILOCK_SHARED;
+
+ if (xfs_need_iread_extents(&ip->i_df)) {
+ lock_mode = XFS_ILOCK_EXCL;
+ goto lock;
+ }
+
+ if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
+ lock_mode = XFS_ILOCK_EXCL;
+
+lock:
+ xfs_ilock(ip, lock_mode);
+ return lock_mode;
+}
+
+/* Record reverse mappings for a file. */
+STATIC int
+xrep_rmap_scan_inode(
+ struct xrep_rmap *rr,
+ struct xfs_inode *ip)
+{
+ unsigned int lock_mode = 0;
+ int error;
+
+ /*
+ * Directory updates (create/link/unlink/rename) drop the directory's
+ * ILOCK before finishing any rmapbt updates associated with directory
+ * shape changes. For this scan to coordinate correctly with the live
+ * update hook, we must take the only lock (i_rwsem) that is held all
+ * the way to dir op completion. This will get fixed by the parent
+ * pointer patchset.
+ */
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ lock_mode = XFS_IOLOCK_SHARED;
+ xfs_ilock(ip, lock_mode);
+ }
+ lock_mode |= xrep_rmap_scan_ilock(ip);
+
+ /* Check the data fork. */
+ error = xrep_rmap_scan_ifork(rr, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_unlock;
+
+ /* Check the attr fork. */
+ error = xrep_rmap_scan_ifork(rr, ip, XFS_ATTR_FORK);
+ if (error)
+ goto out_unlock;
+
+ /* COW fork extents are "owned" by the refcount btree. */
+
+ xchk_iscan_mark_visited(&rr->iscan, ip);
+out_unlock:
+ xfs_iunlock(ip, lock_mode);
+ return error;
+}
+
+/* Section (I): Find all AG metadata extents except for free space metadata. */
+
+struct xrep_rmap_inodes {
+ struct xrep_rmap *rr;
+ struct xagb_bitmap inobt_blocks; /* INOBIT */
+ struct xagb_bitmap ichunk_blocks; /* ICHUNKBIT */
+};
+
+/* Record inode btree rmaps. */
+STATIC int
+xrep_rmap_walk_inobt(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xrep_rmap_inodes *ri = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_agblock_t agbno;
+ xfs_extlen_t aglen;
+ xfs_agino_t agino;
+ xfs_agino_t iperhole;
+ unsigned int i;
+ int error;
+
+ /* Record the inobt blocks. */
+ error = xagb_bitmap_set_btcur_path(&ri->inobt_blocks, cur);
+ if (error)
+ return error;
+
+ xfs_inobt_btrec_to_irec(mp, rec, &irec);
+ if (xfs_inobt_check_irec(cur->bc_ag.pag, &irec) != NULL)
+ return -EFSCORRUPTED;
+
+ agino = irec.ir_startino;
+
+ /* Record a non-sparse inode chunk. */
+ if (!xfs_inobt_issparse(irec.ir_holemask)) {
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+ aglen = max_t(xfs_extlen_t, 1,
+ XFS_INODES_PER_CHUNK / mp->m_sb.sb_inopblock);
+
+ return xagb_bitmap_set(&ri->ichunk_blocks, agbno, aglen);
+ }
+
+ /* Iterate each chunk. */
+ iperhole = max_t(xfs_agino_t, mp->m_sb.sb_inopblock,
+ XFS_INODES_PER_HOLEMASK_BIT);
+ aglen = iperhole / mp->m_sb.sb_inopblock;
+ for (i = 0, agino = irec.ir_startino;
+ i < XFS_INOBT_HOLEMASK_BITS;
+ i += iperhole / XFS_INODES_PER_HOLEMASK_BIT, agino += iperhole) {
+ /* Skip holes. */
+ if (irec.ir_holemask & (1 << i))
+ continue;
+
+ /* Record the inode chunk otherwise. */
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+ error = xagb_bitmap_set(&ri->ichunk_blocks, agbno, aglen);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/* Collect rmaps for the blocks containing inode btrees and the inode chunks. */
+STATIC int
+xrep_rmap_find_inode_rmaps(
+ struct xrep_rmap *rr)
+{
+ struct xrep_rmap_inodes ri = {
+ .rr = rr,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ xagb_bitmap_init(&ri.inobt_blocks);
+ xagb_bitmap_init(&ri.ichunk_blocks);
+
+ /*
+ * Iterate every record in the inobt so we can capture all the inode
+ * chunks and the blocks in the inobt itself.
+ */
+ error = xfs_btree_query_all(sc->sa.ino_cur, xrep_rmap_walk_inobt, &ri);
+ if (error)
+ goto out_bitmap;
+
+ /*
+ * Note that if there are zero records in the inobt then query_all does
+ * nothing and we have to account the empty inobt root manually.
+ */
+ if (xagb_bitmap_empty(&ri.ichunk_blocks)) {
+ struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
+
+ error = xagb_bitmap_set(&ri.inobt_blocks,
+ be32_to_cpu(agi->agi_root), 1);
+ if (error)
+ goto out_bitmap;
+ }
+
+ /* Scan the finobt too. */
+ if (xfs_has_finobt(sc->mp)) {
+ error = xagb_bitmap_set_btblocks(&ri.inobt_blocks,
+ sc->sa.fino_cur);
+ if (error)
+ goto out_bitmap;
+ }
+
+ /* Generate rmaps for everything. */
+ error = xrep_rmap_stash_bitmap(rr, &ri.inobt_blocks,
+ &XFS_RMAP_OINFO_INOBT);
+ if (error)
+ goto out_bitmap;
+ error = xrep_rmap_stash_bitmap(rr, &ri.ichunk_blocks,
+ &XFS_RMAP_OINFO_INODES);
+
+out_bitmap:
+ xagb_bitmap_destroy(&ri.inobt_blocks);
+ xagb_bitmap_destroy(&ri.ichunk_blocks);
+ return error;
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xrep_rmap_walk_cowblocks(
+ struct xfs_btree_cur *cur,
+ const struct xfs_refcount_irec *irec,
+ void *priv)
+{
+ struct xagb_bitmap *bitmap = priv;
+
+ if (!xfs_refcount_check_domain(irec) ||
+ irec->rc_domain != XFS_REFC_DOMAIN_COW)
+ return -EFSCORRUPTED;
+
+ return xagb_bitmap_set(bitmap, irec->rc_startblock, irec->rc_blockcount);
+}
+
+/*
+ * Collect rmaps for the blocks containing the refcount btree, and all CoW
+ * staging extents.
+ */
+STATIC int
+xrep_rmap_find_refcount_rmaps(
+ struct xrep_rmap *rr)
+{
+ struct xagb_bitmap refcountbt_blocks; /* REFCBIT */
+ struct xagb_bitmap cow_blocks; /* COWBIT */
+ struct xfs_refcount_irec low = {
+ .rc_startblock = 0,
+ .rc_domain = XFS_REFC_DOMAIN_COW,
+ };
+ struct xfs_refcount_irec high = {
+ .rc_startblock = -1U,
+ .rc_domain = XFS_REFC_DOMAIN_COW,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ if (!xfs_has_reflink(sc->mp))
+ return 0;
+
+ xagb_bitmap_init(&refcountbt_blocks);
+ xagb_bitmap_init(&cow_blocks);
+
+ /* refcountbt */
+ error = xagb_bitmap_set_btblocks(&refcountbt_blocks, sc->sa.refc_cur);
+ if (error)
+ goto out_bitmap;
+
+ /* Collect rmaps for CoW staging extents. */
+ error = xfs_refcount_query_range(sc->sa.refc_cur, &low, &high,
+ xrep_rmap_walk_cowblocks, &cow_blocks);
+ if (error)
+ goto out_bitmap;
+
+ /* Generate rmaps for everything. */
+ error = xrep_rmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
+ if (error)
+ goto out_bitmap;
+ error = xrep_rmap_stash_bitmap(rr, &refcountbt_blocks,
+ &XFS_RMAP_OINFO_REFC);
+
+out_bitmap:
+ xagb_bitmap_destroy(&cow_blocks);
+ xagb_bitmap_destroy(&refcountbt_blocks);
+ return error;
+}
+
+/* Generate rmaps for the AG headers (AGI/AGF/AGFL) */
+STATIC int
+xrep_rmap_find_agheader_rmaps(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+
+ /* Create a record for the AG sb->agfl. */
+ return xrep_rmap_stash(rr, XFS_SB_BLOCK(sc->mp),
+ XFS_AGFL_BLOCK(sc->mp) - XFS_SB_BLOCK(sc->mp) + 1,
+ XFS_RMAP_OWN_FS, 0, 0);
+}
+
+/* Generate rmaps for the log, if it's in this AG. */
+STATIC int
+xrep_rmap_find_log_rmaps(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+
+ if (!xfs_ag_contains_log(sc->mp, sc->sa.pag->pag_agno))
+ return 0;
+
+ return xrep_rmap_stash(rr,
+ XFS_FSB_TO_AGBNO(sc->mp, sc->mp->m_sb.sb_logstart),
+ sc->mp->m_sb.sb_logblocks, XFS_RMAP_OWN_LOG, 0, 0);
+}
+
+/* Check and count all the records that we gathered. */
+STATIC int
+xrep_rmap_check_record(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rmap *rr = priv;
+ int error;
+
+ error = xrep_rmap_check_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ rr->nr_records++;
+ return 0;
+}
+
+/*
+ * Generate all the reverse-mappings for this AG, a list of the old rmapbt
+ * blocks, and the new btreeblks count. Figure out if we have enough free
+ * space to reconstruct the inode btrees. The caller must clean up the lists
+ * if anything goes wrong. This implements section (I) above.
+ */
+STATIC int
+xrep_rmap_find_rmaps(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xchk_ag *sa = &sc->sa;
+ struct xfs_inode *ip;
+ struct xfs_btree_cur *mcur;
+ int error;
+
+ /* Find all the per-AG metadata. */
+ xrep_ag_btcur_init(sc, &sc->sa);
+
+ error = xrep_rmap_find_inode_rmaps(rr);
+ if (error)
+ goto end_agscan;
+
+ error = xrep_rmap_find_refcount_rmaps(rr);
+ if (error)
+ goto end_agscan;
+
+ error = xrep_rmap_find_agheader_rmaps(rr);
+ if (error)
+ goto end_agscan;
+
+ error = xrep_rmap_find_log_rmaps(rr);
+end_agscan:
+ xchk_ag_btcur_free(&sc->sa);
+ if (error)
+ return error;
+
+ /*
+ * Set up for a potentially lengthy filesystem scan by reducing our
+ * transaction resource usage for the duration. Specifically:
+ *
+ * Unlock the AG header buffers and cancel the transaction to release
+ * the log grant space while we scan the filesystem.
+ *
+ * Create a new empty transaction to eliminate the possibility of the
+ * inode scan deadlocking on cyclical metadata.
+ *
+ * We pass the empty transaction to the file scanning function to avoid
+ * repeatedly cycling empty transactions. This can be done even though
+ * we take the IOLOCK to quiesce the file because empty transactions
+ * do not take sb_internal.
+ */
+ sa->agf_bp = NULL;
+ sa->agi_bp = NULL;
+ xchk_trans_cancel(sc);
+ error = xchk_trans_alloc_empty(sc);
+ if (error)
+ return error;
+
+ /* Iterate all AGs for inodes rmaps. */
+ while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
+ error = xrep_rmap_scan_inode(rr, ip);
+ xchk_irele(sc, ip);
+ if (error)
+ break;
+
+ if (xchk_should_terminate(sc, &error))
+ break;
+ }
+ xchk_iscan_iter_finish(&rr->iscan);
+ if (error)
+ return error;
+
+ /*
+ * Switch out for a real transaction and lock the AG headers in
+ * preparation for building a new tree.
+ */
+ xchk_trans_cancel(sc);
+ error = xchk_setup_fs(sc);
+ if (error)
+ return error;
+ error = xchk_perag_drain_and_lock(sc);
+ if (error)
+ return error;
+
+ /*
+ * If a hook failed to update the in-memory btree, we lack the data to
+ * continue the repair.
+ */
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ /*
+ * Now that we have everything locked again, we need to count the
+ * number of rmap records stashed in the btree. This should reflect
+ * all actively-owned space in the filesystem. At the same time, check
+ * all our records before we start building a new btree, which requires
+ * a bnobt cursor.
+ */
+ mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, &rr->rmap_btree);
+ sc->sa.bno_cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.pag);
+
+ rr->nr_records = 0;
+ error = xfs_rmap_query_all(mcur, xrep_rmap_check_record, rr);
+
+ xfs_btree_del_cursor(sc->sa.bno_cur, error);
+ sc->sa.bno_cur = NULL;
+ xfs_btree_del_cursor(mcur, error);
+
+ return error;
+}
+
+/* Section (II): Reserving space for new rmapbt and setting free space bitmap */
+
+struct xrep_rmap_agfl {
+ struct xagb_bitmap *bitmap;
+ xfs_agnumber_t agno;
+};
+
+/* Add an AGFL block to the rmap list. */
+STATIC int
+xrep_rmap_walk_agfl(
+ struct xfs_mount *mp,
+ xfs_agblock_t agbno,
+ void *priv)
+{
+ struct xrep_rmap_agfl *ra = priv;
+
+ return xagb_bitmap_set(ra->bitmap, agbno, 1);
+}
+
+/*
+ * Run one round of reserving space for the new rmapbt and recomputing the
+ * number of blocks needed to store the previously observed rmapbt records and
+ * the ones we'll create for the free space metadata. When we don't need more
+ * blocks, return a bitmap of OWN_AG extents in @freesp_blocks and set @done to
+ * true.
+ */
+STATIC int
+xrep_rmap_try_reserve(
+ struct xrep_rmap *rr,
+ struct xfs_btree_cur *rmap_cur,
+ struct xagb_bitmap *freesp_blocks,
+ uint64_t *blocks_reserved,
+ bool *done)
+{
+ struct xrep_rmap_agfl ra = {
+ .bitmap = freesp_blocks,
+ .agno = rr->sc->sa.pag->pag_agno,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ struct xrep_newbt_resv *resv, *n;
+ struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
+ struct xfs_buf *agfl_bp;
+ uint64_t nr_blocks; /* RMB */
+ uint64_t freesp_records;
+ int error;
+
+ /*
+ * We're going to recompute new_btree.bload.nr_blocks at the end of
+ * this function to reflect however many btree blocks we need to store
+ * all the rmap records (including the ones that reflect the changes we
+ * made to support the new rmapbt blocks), so we save the old value
+ * here so we can decide if we've reserved enough blocks.
+ */
+ nr_blocks = rr->new_btree.bload.nr_blocks;
+
+ /*
+ * Make sure we've reserved enough space for the new btree. This can
+ * change the shape of the free space btrees, which can cause secondary
+ * interactions with the rmap records because all three space btrees
+ * have the same rmap owner. We'll account for all that below.
+ */
+ error = xrep_newbt_alloc_blocks(&rr->new_btree,
+ nr_blocks - *blocks_reserved);
+ if (error)
+ return error;
+
+ *blocks_reserved = rr->new_btree.bload.nr_blocks;
+
+ /* Clear everything in the bitmap. */
+ xagb_bitmap_destroy(freesp_blocks);
+
+ /* Set all the bnobt blocks in the bitmap. */
+ sc->sa.bno_cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.pag);
+ error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.bno_cur);
+ xfs_btree_del_cursor(sc->sa.bno_cur, error);
+ sc->sa.bno_cur = NULL;
+ if (error)
+ return error;
+
+ /* Set all the cntbt blocks in the bitmap. */
+ sc->sa.cnt_cur = xfs_cntbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.pag);
+ error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.cnt_cur);
+ xfs_btree_del_cursor(sc->sa.cnt_cur, error);
+ sc->sa.cnt_cur = NULL;
+ if (error)
+ return error;
+
+ /* Record our new btreeblks value. */
+ rr->freesp_btblocks = xagb_bitmap_hweight(freesp_blocks) - 2;
+
+ /* Set all the new rmapbt blocks in the bitmap. */
+ list_for_each_entry_safe(resv, n, &rr->new_btree.resv_list, list) {
+ error = xagb_bitmap_set(freesp_blocks, resv->agbno, resv->len);
+ if (error)
+ return error;
+ }
+
+ /* Set all the AGFL blocks in the bitmap. */
+ error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
+ if (error)
+ return error;
+
+ error = xfs_agfl_walk(sc->mp, agf, agfl_bp, xrep_rmap_walk_agfl, &ra);
+ if (error)
+ return error;
+
+ /* Count the extents in the bitmap. */
+ freesp_records = xagb_bitmap_count_set_regions(freesp_blocks);
+
+ /* Compute how many blocks we'll need for all the rmaps. */
+ error = xfs_btree_bload_compute_geometry(rmap_cur,
+ &rr->new_btree.bload, rr->nr_records + freesp_records);
+ if (error)
+ return error;
+
+ /* We're done when we don't need more blocks. */
+ *done = nr_blocks >= rr->new_btree.bload.nr_blocks;
+ return 0;
+}
+
+/*
+ * Iteratively reserve space for rmap btree while recording OWN_AG rmaps for
+ * the free space metadata. This implements section (II) above.
+ */
+STATIC int
+xrep_rmap_reserve_space(
+ struct xrep_rmap *rr,
+ struct xfs_btree_cur *rmap_cur)
+{
+ struct xagb_bitmap freesp_blocks; /* AGBIT */
+ uint64_t blocks_reserved = 0;
+ bool done = false;
+ int error;
+
+ /* Compute how many blocks we'll need for the rmaps collected so far. */
+ error = xfs_btree_bload_compute_geometry(rmap_cur,
+ &rr->new_btree.bload, rr->nr_records);
+ if (error)
+ return error;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ xagb_bitmap_init(&freesp_blocks);
+
+ /*
+ * Iteratively reserve space for the new rmapbt and recompute the
+ * number of blocks needed to store the previously observed rmapbt
+ * records and the ones we'll create for the free space metadata.
+ * Finish when we don't need more blocks.
+ */
+ do {
+ error = xrep_rmap_try_reserve(rr, rmap_cur, &freesp_blocks,
+ &blocks_reserved, &done);
+ if (error)
+ goto out_bitmap;
+ } while (!done);
+
+ /* Emit rmaps for everything in the free space bitmap. */
+ xrep_ag_btcur_init(rr->sc, &rr->sc->sa);
+ error = xrep_rmap_stash_bitmap(rr, &freesp_blocks, &XFS_RMAP_OINFO_AG);
+ xchk_ag_btcur_free(&rr->sc->sa);
+
+out_bitmap:
+ xagb_bitmap_destroy(&freesp_blocks);
+ return error;
+}
+
+/* Section (III): Building the new rmap btree. */
+
+/* Update the AGF counters. */
+STATIC int
+xrep_rmap_reset_counters(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
+ xfs_agblock_t rmap_btblocks;
+
+ /*
+ * The AGF header contains extra information related to the reverse
+ * mapping btree, so we must update those fields here.
+ */
+ rmap_btblocks = rr->new_btree.afake.af_blocks - 1;
+ agf->agf_btreeblks = cpu_to_be32(rr->freesp_btblocks + rmap_btblocks);
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS);
+
+ /*
+ * After we commit the new btree to disk, it is possible that the
+ * process to reap the old btree blocks will race with the AIL trying
+ * to checkpoint the old btree blocks into the filesystem. If the new
+ * tree is shorter than the old one, the rmapbt write verifier will
+ * fail and the AIL will shut down the filesystem.
+ *
+ * To avoid this, save the old incore btree height values as the alt
+ * height values before re-initializing the perag info from the updated
+ * AGF to capture all the new values.
+ */
+ pag->pagf_repair_rmap_level = pag->pagf_rmap_level;
+
+ /* Reinitialize with the values we just logged. */
+ return xrep_reinit_pagf(sc);
+}
+
+/* Retrieve rmapbt data for bulk load. */
+STATIC int
+xrep_rmap_get_records(
+ struct xfs_btree_cur *cur,
+ unsigned int idx,
+ struct xfs_btree_block *block,
+ unsigned int nr_wanted,
+ void *priv)
+{
+ struct xrep_rmap *rr = priv;
+ union xfs_btree_rec *block_rec;
+ unsigned int loaded;
+ int error;
+
+ for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+ int stat = 0;
+
+ error = xfs_btree_increment(rr->mcur, 0, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return -EFSCORRUPTED;
+
+ error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return -EFSCORRUPTED;
+
+ block_rec = xfs_btree_rec_addr(cur, idx, block);
+ cur->bc_ops->init_rec_from_cur(cur, block_rec);
+ }
+
+ return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rmap_claim_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_rmap *rr = priv;
+
+ return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
+}
+
+/* Custom allocation function for new rmap btrees. */
+STATIC int
+xrep_rmap_alloc_vextent(
+ struct xfs_scrub *sc,
+ struct xfs_alloc_arg *args,
+ xfs_fsblock_t alloc_hint)
+{
+ int error;
+
+ /*
+ * We don't want an rmap update on the allocation, since we iteratively
+ * compute the OWN_AG records /after/ allocating blocks for the records
+ * that we already know we need to store. Therefore, fix the freelist
+ * with the NORMAP flag set so that we don't also try to create an rmap
+ * for new AGFL blocks.
+ */
+ error = xrep_fix_freelist(sc, XFS_ALLOC_FLAG_NORMAP);
+ if (error)
+ return error;
+
+ /*
+ * If xrep_fix_freelist fixed the freelist by moving blocks from the
+ * free space btrees or by removing blocks from the AGFL and queueing
+ * an EFI to free the block, the transaction will be dirty. This
+ * second case is of interest to us.
+ *
+ * Later on, we will need to compare gaps in the new recordset against
+ * the block usage of all OWN_AG owners in order to free the old
+ * btree's blocks, which means that we can't have EFIs for former AGFL
+ * blocks attached to the repair transaction when we commit the new
+ * btree.
+ *
+ * xrep_newbt_alloc_blocks guarantees this for us by calling
+ * xrep_defer_finish to commit anything that fix_freelist may have
+ * added to the transaction.
+ */
+ return xfs_alloc_vextent_near_bno(args, alloc_hint);
+}
+
+
+/* Count the records in this btree. */
+STATIC int
+xrep_rmap_count_records(
+ struct xfs_btree_cur *cur,
+ unsigned long long *nr)
+{
+ int running = 1;
+ int error;
+
+ *nr = 0;
+
+ error = xfs_btree_goto_left_edge(cur);
+ if (error)
+ return error;
+
+ while (running && !(error = xfs_btree_increment(cur, 0, &running))) {
+ if (running)
+ (*nr)++;
+ }
+
+ return error;
+}
+/*
+ * Use the collected rmap information to stage a new rmap btree. If this is
+ * successful we'll return with the new btree root information logged to the
+ * repair transaction but not yet committed. This implements section (III)
+ * above.
+ */
+STATIC int
+xrep_rmap_build_new_tree(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
+ struct xfs_btree_cur *rmap_cur;
+ xfs_fsblock_t fsbno;
+ int error;
+
+ /*
+ * Preserve the old rmapbt block count so that we can adjust the
+ * per-AG rmapbt reservation after we commit the new btree root and
+ * want to dispose of the old btree blocks.
+ */
+ rr->old_rmapbt_fsbcount = be32_to_cpu(agf->agf_rmap_blocks);
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the AG header. The new blocks are accounted to the
+ * rmapbt per-AG reservation, which we will adjust further after
+ * committing the new btree.
+ */
+ fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, XFS_RMAP_BLOCK(sc->mp));
+ xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_SKIP_UPDATE,
+ fsbno, XFS_AG_RESV_RMAPBT);
+ rr->new_btree.bload.get_records = xrep_rmap_get_records;
+ rr->new_btree.bload.claim_block = xrep_rmap_claim_block;
+ rr->new_btree.alloc_vextent = xrep_rmap_alloc_vextent;
+ rmap_cur = xfs_rmapbt_init_cursor(sc->mp, NULL, NULL, pag);
+ xfs_btree_stage_afakeroot(rmap_cur, &rr->new_btree.afake);
+
+ /*
+ * Initialize @rr->new_btree, reserve space for the new rmapbt,
+ * and compute OWN_AG rmaps.
+ */
+ error = xrep_rmap_reserve_space(rr, rmap_cur);
+ if (error)
+ goto err_cur;
+
+ /*
+ * Count the rmapbt records again, because the space reservation
+ * for the rmapbt itself probably added more records to the btree.
+ */
+ rr->mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL,
+ &rr->rmap_btree);
+
+ error = xrep_rmap_count_records(rr->mcur, &rr->nr_records);
+ if (error)
+ goto err_mcur;
+
+ /*
+ * Due to btree slack factors, it's possible for a new btree to be one
+ * level taller than the old btree. Update the incore btree height so
+ * that we don't trip the verifiers when writing the new btree blocks
+ * to disk.
+ */
+ pag->pagf_repair_rmap_level = rr->new_btree.bload.btree_height;
+
+ /*
+ * Move the cursor to the left edge of the tree so that the first
+ * increment in ->get_records positions us at the first record.
+ */
+ error = xfs_btree_goto_left_edge(rr->mcur);
+ if (error)
+ goto err_level;
+
+ /* Add all observed rmap records. */
+ error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
+ if (error)
+ goto err_level;
+
+ /*
+ * Install the new btree in the AG header. After this point the old
+ * btree is no longer accessible and the new tree is live.
+ */
+ xfs_rmapbt_commit_staged_btree(rmap_cur, sc->tp, sc->sa.agf_bp);
+ xfs_btree_del_cursor(rmap_cur, 0);
+ xfs_btree_del_cursor(rr->mcur, 0);
+ rr->mcur = NULL;
+
+ /*
+ * Now that we've written the new btree to disk, we don't need to keep
+ * updating the in-memory btree. Abort the scan to stop live updates.
+ */
+ xchk_iscan_abort(&rr->iscan);
+
+ /*
+ * The newly committed rmap recordset includes mappings for the blocks
+ * that we reserved to build the new btree. If there is excess space
+ * reservation to be freed, the corresponding rmap records must also be
+ * removed.
+ */
+ rr->new_btree.oinfo = XFS_RMAP_OINFO_AG;
+
+ /* Reset the AGF counters now that we've changed the btree shape. */
+ error = xrep_rmap_reset_counters(rr);
+ if (error)
+ goto err_newbt;
+
+ /* Dispose of any unused blocks and the accounting information. */
+ error = xrep_newbt_commit(&rr->new_btree);
+ if (error)
+ return error;
+
+ return xrep_roll_ag_trans(sc);
+
+err_level:
+ pag->pagf_repair_rmap_level = 0;
+err_mcur:
+ xfs_btree_del_cursor(rr->mcur, error);
+err_cur:
+ xfs_btree_del_cursor(rmap_cur, error);
+err_newbt:
+ xrep_newbt_cancel(&rr->new_btree);
+ return error;
+}
+
+/* Section (IV): Reaping the old btree. */
+
+struct xrep_rmap_find_gaps {
+ struct xagb_bitmap rmap_gaps;
+ xfs_agblock_t next_agbno;
+};
+
+/* Subtract each free extent in the bnobt from the rmap gaps. */
+STATIC int
+xrep_rmap_find_freesp(
+ struct xfs_btree_cur *cur,
+ const struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xrep_rmap_find_gaps *rfg = priv;
+
+ return xagb_bitmap_clear(&rfg->rmap_gaps, rec->ar_startblock,
+ rec->ar_blockcount);
+}
+
+/* Record the free space we find, as part of cleaning out the btree. */
+STATIC int
+xrep_rmap_find_gaps(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rmap_find_gaps *rfg = priv;
+ int error;
+
+ if (rec->rm_startblock > rfg->next_agbno) {
+ error = xagb_bitmap_set(&rfg->rmap_gaps, rfg->next_agbno,
+ rec->rm_startblock - rfg->next_agbno);
+ if (error)
+ return error;
+ }
+
+ rfg->next_agbno = max_t(xfs_agblock_t, rfg->next_agbno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/*
+ * Reap the old rmapbt blocks. Now that the rmapbt is fully rebuilt, we make
+ * a list of gaps in the rmap records and a list of the extents mentioned in
+ * the bnobt. Any block that's in the new rmapbt gap list but not mentioned
+ * in the bnobt is a block from the old rmapbt and can be removed.
+ */
+STATIC int
+xrep_rmap_remove_old_tree(
+ struct xrep_rmap *rr)
+{
+ struct xrep_rmap_find_gaps rfg = {
+ .next_agbno = 0,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_btree_cur *mcur;
+ xfs_agblock_t agend;
+ int error;
+
+ xagb_bitmap_init(&rfg.rmap_gaps);
+
+ /* Compute free space from the new rmapbt. */
+ mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, &rr->rmap_btree);
+
+ error = xfs_rmap_query_all(mcur, xrep_rmap_find_gaps, &rfg);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_bitmap;
+
+ /* Insert a record for space between the last rmap and EOAG. */
+ agend = be32_to_cpu(agf->agf_length);
+ if (rfg.next_agbno < agend) {
+ error = xagb_bitmap_set(&rfg.rmap_gaps, rfg.next_agbno,
+ agend - rfg.next_agbno);
+ if (error)
+ goto out_bitmap;
+ }
+
+ /* Compute free space from the existing bnobt. */
+ sc->sa.bno_cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.pag);
+ error = xfs_alloc_query_all(sc->sa.bno_cur, xrep_rmap_find_freesp,
+ &rfg);
+ xfs_btree_del_cursor(sc->sa.bno_cur, error);
+ sc->sa.bno_cur = NULL;
+ if (error)
+ goto out_bitmap;
+
+ /*
+ * Free the "free" blocks that the new rmapbt knows about but the bnobt
+ * doesn't--these are the old rmapbt blocks. Credit the old rmapbt
+ * block usage count back to the per-AG rmapbt reservation (and not
+ * fdblocks, since the rmap btree lives in free space) to keep the
+ * reservation and free space accounting correct.
+ */
+ error = xrep_reap_agblocks(sc, &rfg.rmap_gaps,
+ &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_RMAPBT);
+ if (error)
+ goto out_bitmap;
+
+ /*
+ * Now that we've zapped all the old rmapbt blocks we can turn off
+ * the alternate height mechanism and reset the per-AG space
+ * reservation.
+ */
+ pag->pagf_repair_rmap_level = 0;
+ sc->flags |= XREP_RESET_PERAG_RESV;
+out_bitmap:
+ xagb_bitmap_destroy(&rfg.rmap_gaps);
+ return error;
+}
+
+static inline bool
+xrep_rmapbt_want_live_update(
+ struct xchk_iscan *iscan,
+ const struct xfs_owner_info *oi)
+{
+ if (xchk_iscan_aborted(iscan))
+ return false;
+
+ /*
+ * Before unlocking the AG header to perform the inode scan, we
+ * recorded reverse mappings for all AG metadata except for the OWN_AG
+ * metadata. IOWs, the in-memory btree knows about the AG headers, the
+ * two inode btrees, the CoW staging extents, and the refcount btrees.
+ * For these types of metadata, we need to record the live updates in
+ * the in-memory rmap btree.
+ *
+ * However, we do not scan the free space btrees or the AGFL until we
+ * have re-locked the AGF and are ready to reserve space for the new
+ * rmap btree, so we do not want live updates for OWN_AG metadata.
+ */
+ if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
+ return oi->oi_owner != XFS_RMAP_OWN_AG;
+
+ /* Ignore updates to files that the scanner hasn't visited yet. */
+ return xchk_iscan_want_live_update(iscan, oi->oi_owner);
+}
+
+/*
+ * Apply a rmapbt update from the regular filesystem into our shadow btree.
+ * We're running from the thread that owns the AGF buffer and is generating
+ * the update, so we must be careful about which parts of the struct xrep_rmap
+ * that we change.
+ */
+static int
+xrep_rmapbt_live_update(
+ struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct xfs_rmap_update_params *p = data;
+ struct xrep_rmap *rr;
+ struct xfs_mount *mp;
+ struct xfs_btree_cur *mcur;
+ struct xfs_trans *tp;
+ void *txcookie;
+ int error;
+
+ rr = container_of(nb, struct xrep_rmap, rhook.rmap_hook.nb);
+ mp = rr->sc->mp;
+
+ if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
+ goto out_unlock;
+
+ trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, action, p);
+
+ error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
+ if (error)
+ goto out_abort;
+
+ mutex_lock(&rr->lock);
+ mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, &rr->rmap_btree);
+ error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
+ p->blockcount, &p->oinfo, p->unwritten);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(&rr->rmap_btree, tp);
+ if (error)
+ goto out_cancel;
+
+ xrep_trans_cancel_hook_dummy(&txcookie, tp);
+ mutex_unlock(&rr->lock);
+ return NOTIFY_DONE;
+
+out_cancel:
+ xfbtree_trans_cancel(&rr->rmap_btree, tp);
+ xrep_trans_cancel_hook_dummy(&txcookie, tp);
+out_abort:
+ mutex_unlock(&rr->lock);
+ xchk_iscan_abort(&rr->iscan);
+out_unlock:
+ return NOTIFY_DONE;
+}
+
+/* Set up the filesystem scan components. */
+STATIC int
+xrep_rmap_setup_scan(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ mutex_init(&rr->lock);
+
+ /* Set up in-memory rmap btree */
+ error = xfs_rmapbt_mem_init(sc->mp, &rr->rmap_btree, sc->xmbtp,
+ sc->sa.pag->pag_agno);
+ if (error)
+ goto out_mutex;
+
+ /* Retry iget every tenth of a second for up to 30 seconds. */
+ xchk_iscan_start(sc, 30000, 100, &rr->iscan);
+
+ /*
+ * Hook into live rmap operations so that we can update our in-memory
+ * btree to reflect live changes on the filesystem. Since we drop the
+ * AGF buffer to scan all the inodes, we need this piece to avoid
+ * installing a stale btree.
+ */
+ ASSERT(sc->flags & XCHK_FSGATES_RMAP);
+ xfs_rmap_hook_setup(&rr->rhook, xrep_rmapbt_live_update);
+ error = xfs_rmap_hook_add(sc->sa.pag, &rr->rhook);
+ if (error)
+ goto out_iscan;
+ return 0;
+
+out_iscan:
+ xchk_iscan_teardown(&rr->iscan);
+ xfbtree_destroy(&rr->rmap_btree);
+out_mutex:
+ mutex_destroy(&rr->lock);
+ return error;
+}
+
+/* Tear down scan components. */
+STATIC void
+xrep_rmap_teardown(
+ struct xrep_rmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+
+ xchk_iscan_abort(&rr->iscan);
+ xfs_rmap_hook_del(sc->sa.pag, &rr->rhook);
+ xchk_iscan_teardown(&rr->iscan);
+ xfbtree_destroy(&rr->rmap_btree);
+ mutex_destroy(&rr->lock);
+}
+
+/* Repair the rmap btree for some AG. */
+int
+xrep_rmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rmap *rr = sc->buf;
+ int error;
+
+ error = xrep_rmap_setup_scan(rr);
+ if (error)
+ return error;
+
+ /*
+ * Collect rmaps for everything in this AG that isn't space metadata.
+ * These rmaps won't change even as we try to allocate blocks.
+ */
+ error = xrep_rmap_find_rmaps(rr);
+ if (error)
+ goto out_records;
+
+ /* Rebuild the rmap information. */
+ error = xrep_rmap_build_new_tree(rr);
+ if (error)
+ goto out_records;
+
+ /* Kill the old tree. */
+ error = xrep_rmap_remove_old_tree(rr);
+ if (error)
+ goto out_records;
+
+out_records:
+ xrep_rmap_teardown(rr);
+ return error;
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 6828e72824fb..20fac9723c08 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -16,6 +16,7 @@
#include "xfs_qm.h"
#include "xfs_scrub.h"
#include "xfs_buf_mem.h"
+#include "xfs_rmap.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -164,6 +165,9 @@ xchk_fsgates_disable(
if (sc->flags & XCHK_FSGATES_DIRENTS)
xfs_dir_hook_disable();
+ if (sc->flags & XCHK_FSGATES_RMAP)
+ xfs_rmap_hook_disable();
+
sc->flags &= ~XCHK_FSGATES_ALL;
}
@@ -278,7 +282,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_ag_rmapbt,
.scrub = xchk_rmapbt,
.has = xfs_has_rmapbt,
- .repair = xrep_notsupported,
+ .repair = xrep_rmapbt,
},
[XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
.type = ST_PERAG,
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 1247284c17a0..9ad65b604fe1 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -126,6 +126,7 @@ struct xfs_scrub {
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */
+#define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */
#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
@@ -137,7 +138,8 @@ struct xfs_scrub {
*/
#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \
XCHK_FSGATES_QUOTA | \
- XCHK_FSGATES_DIRENTS)
+ XCHK_FSGATES_DIRENTS | \
+ XCHK_FSGATES_RMAP)
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index dc8a331f4b02..3dd281d6d185 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -18,6 +18,7 @@
#include "xfs_quota_defs.h"
#include "xfs_da_format.h"
#include "xfs_dir2.h"
+#include "xfs_rmap.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index b840f25c03d6..5b294be52c55 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -25,6 +25,7 @@ struct xchk_dqiter;
struct xchk_iscan;
struct xchk_nlink;
struct xchk_fscounters;
+struct xfs_rmap_update_params;
/*
* ftrace's __print_symbolic requires that all enum values be wrapped in the
@@ -112,9 +113,19 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XCHK_FSGATES_QUOTA, "fsgates_quota" }, \
{ XCHK_FSGATES_DIRENTS, "fsgates_dirents" }, \
+ { XCHK_FSGATES_RMAP, "fsgates_rmap" }, \
{ XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
+TRACE_DEFINE_ENUM(XFS_RMAP_MAP);
+TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP);
+TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT);
+TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED);
+TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC);
+TRACE_DEFINE_ENUM(XFS_RMAP_FREE);
+
DECLARE_EVENT_CLASS(xchk_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
int error),
@@ -1595,7 +1606,6 @@ DEFINE_EVENT(xrep_rmap_class, name, \
uint64_t owner, uint64_t offset, unsigned int flags), \
TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
DEFINE_REPAIR_RMAP_EVENT(xrep_ibt_walk_rmap);
-DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_walk_rmap);
TRACE_EVENT(xrep_abt_found,
@@ -1713,6 +1723,38 @@ TRACE_EVENT(xrep_bmap_found,
__entry->state)
);
+TRACE_EVENT(xrep_rmap_found,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ const struct xfs_rmap_irec *rec),
+ TP_ARGS(mp, agno, rec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agbno = rec->rm_startblock;
+ __entry->len = rec->rm_blockcount;
+ __entry->owner = rec->rm_owner;
+ __entry->offset = rec->rm_offset;
+ __entry->flags = rec->rm_flags;
+ ),
+ TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+);
+
TRACE_EVENT(xrep_findroot_block,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
uint32_t magic, uint16_t level),
@@ -2195,6 +2237,42 @@ DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_update_inode);
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_unfixable_inode);
+TRACE_EVENT(xrep_rmap_live_update,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int op,
+ const struct xfs_rmap_update_params *p),
+ TP_ARGS(mp, agno, op, p),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(unsigned int, op)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->op = op;
+ __entry->agbno = p->startblock;
+ __entry->len = p->blockcount;
+ xfs_owner_info_unpack(&p->oinfo, &__entry->owner,
+ &__entry->offset, &__entry->flags);
+ if (p->unwritten)
+ __entry->flags |= XFS_RMAP_UNWRITTEN;
+ ),
+ TP_printk("dev %d:%d agno 0x%x op %d agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->op,
+ __entry->agbno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 90a77cd3ebad..5c6773628d69 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -50,7 +50,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "ibt2", xfsstats_offset(xs_fibt_2) },
{ "fibt2", xfsstats_offset(xs_rmap_2) },
{ "rmapbt", xfsstats_offset(xs_refcbt_2) },
- { "refcntbt", xfsstats_offset(xs_qm_dqreclaims)},
+ { "refcntbt", xfsstats_offset(xs_rmap_mem_2) },
+ { "rmapbt_mem", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */
{ "qm", xfsstats_offset(xs_xstrat_bytes)},
};
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 43ffba74f045..3b50419d8bb9 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -125,6 +125,7 @@ struct __xfsstats {
uint32_t xs_fibt_2[__XBTS_MAX];
uint32_t xs_rmap_2[__XBTS_MAX];
uint32_t xs_refcbt_2[__XBTS_MAX];
+ uint32_t xs_rmap_mem_2[__XBTS_MAX];
uint32_t xs_qm_dqreclaims;
uint32_t xs_qm_dqreclaim_misses;
uint32_t xs_qm_dquot_dups;