21 files changed, 1363 insertions, 138 deletions
diff --git a/Documentation/filesystems/xfs/xfs-online-fsck-design.rst b/Documentation/filesystems/xfs/xfs-online-fsck-design.rst
index d03480266e9b..6333697ba3e8 100644
--- a/Documentation/filesystems/xfs/xfs-online-fsck-design.rst
+++ b/Documentation/filesystems/xfs/xfs-online-fsck-design.rst
@@ -2270,13 +2270,12 @@ follows:
    pointing to the xfile.
 
 3. Pass the buffer cache target, buffer ops, and other information to
-   ``xfbtree_create`` to write an initial tree header and root block to the
-   xfile.
+   ``xfbtree_init`` to initialize the passed in ``struct xfbtree`` and write an
+   initial root block to the xfile.
    Each btree type should define a wrapper that passes necessary arguments to
    the creation function.
    For example, rmap btrees define ``xfs_rmapbt_mem_create`` to take care of
    all the necessary details for callers.
-   A ``struct xfbtree`` object will be returned.
 
 4. Pass the xfbtree object to the btree cursor creation function for the
    btree type.
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index fa7eb3e2a248..d41edd30388b 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -128,6 +128,12 @@ config XFS_LIVE_HOOKS
 	bool
 	select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
 
+config XFS_MEMORY_BUFS
+	bool
+
+config XFS_BTREE_IN_MEM
+	bool
+
 config XFS_ONLINE_SCRUB
 	bool "XFS online metadata check support"
 	default n
@@ -135,6 +141,7 @@ config XFS_ONLINE_SCRUB
 	depends on TMPFS && SHMEM
 	select XFS_LIVE_HOOKS
 	select XFS_DRAIN_INTENTS
+	select XFS_MEMORY_BUFS
 	help
 	  If you say Y here you will be able to check metadata on a
 	  mounted XFS filesystem.  This feature is intended to reduce
@@ -169,6 +176,7 @@ config XFS_ONLINE_REPAIR
 	bool "XFS online metadata repair support"
 	default n
 	depends on XFS_FS && XFS_ONLINE_SCRUB
+	select XFS_BTREE_IN_MEM
 	help
 	  If you say Y here you will be able to repair metadata on a
 	  mounted XFS filesystem.  This feature is intended to reduce
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ba8608f469ac..9a6574da8de5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -137,6 +137,8 @@ endif
 
 xfs-$(CONFIG_XFS_DRAIN_INTENTS)	+= xfs_drain.o
 xfs-$(CONFIG_XFS_LIVE_HOOKS)	+= xfs_hooks.o
+xfs-$(CONFIG_XFS_MEMORY_BUFS)	+= xfs_buf_mem.o
+xfs-$(CONFIG_XFS_BTREE_IN_MEM)	+= libxfs/xfs_btree_mem.o
 
 # online scrub/repair
 ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index e598e14320be..32d80a76440c 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -264,7 +264,7 @@ xfs_free_perag(
 		xfs_defer_drain_free(&pag->pag_intents_drain);
 
 		cancel_delayed_work_sync(&pag->pag_blockgc_work);
-		xfs_buf_hash_destroy(pag);
+		xfs_buf_cache_destroy(&pag->pag_bcache);
 
 		/* drop the mount's active reference */
 		xfs_perag_rele(pag);
@@ -352,7 +352,7 @@ xfs_free_unused_perag_range(
 		spin_unlock(&mp->m_perag_lock);
 		if (!pag)
 			break;
-		xfs_buf_hash_destroy(pag);
+		xfs_buf_cache_destroy(&pag->pag_bcache);
 		xfs_defer_drain_free(&pag->pag_intents_drain);
 		kfree(pag);
 	}
@@ -419,7 +419,7 @@ xfs_initialize_perag(
 		pag->pagb_tree = RB_ROOT;
 #endif /* __KERNEL__ */
 
-		error = xfs_buf_hash_init(pag);
+		error = xfs_buf_cache_init(&pag->pag_bcache);
 		if (error)
 			goto out_remove_pag;
 
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 19eddba09894..29bfa6273dec 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -106,9 +106,7 @@ struct xfs_perag {
 	int		pag_ici_reclaimable;	/* reclaimable inodes */
 	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
 
-	/* buffer cache index */
-	spinlock_t	pag_buf_lock;	/* lock for pag_buf_hash */
-	struct rhashtable pag_buf_hash;
+	struct xfs_buf_cache	pag_bcache;
 
 	/* background prealloc block trimming */
 	struct delayed_work	pag_blockgc_work;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index ecdd5ea27a03..d29547572a68 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -28,6 +28,8 @@
 #include "xfs_rmap_btree.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
 
 /*
  * Btree magic numbers.
@@ -76,6 +78,25 @@ xfs_btree_check_fsblock_siblings(
 }
 
 static inline xfs_failaddr_t
+xfs_btree_check_memblock_siblings(
+	struct xfs_buftarg	*btp,
+	xfbno_t			bno,
+	__be64			dsibling)
+{
+	xfbno_t			sibling;
+
+	if (dsibling == cpu_to_be64(NULLFSBLOCK))
+		return NULL;
+
+	sibling = be64_to_cpu(dsibling);
+	if (sibling == bno)
+		return __this_address;
+	if (!xmbuf_verify_daddr(btp, xfbno_to_daddr(sibling)))
+		return __this_address;
+	return NULL;
+}
+
+static inline xfs_failaddr_t
 xfs_btree_check_agblock_siblings(
 	struct xfs_perag	*pag,
 	xfs_agblock_t		agbno,
@@ -165,6 +186,34 @@ __xfs_btree_check_fsblock(
 }
 
 /*
+ * Check an in-memory btree block header.  Return the address of the failing
+ * check, or NULL if everything is ok.
+ */
+static xfs_failaddr_t
+__xfs_btree_check_memblock(
+	struct xfs_btree_cur	*cur,
+	struct xfs_btree_block	*block,
+	int			level,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buftarg	*btp = cur->bc_mem.xfbtree->target;
+	xfs_failaddr_t		fa;
+	xfbno_t			bno;
+
+	fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp);
+	if (fa)
+		return fa;
+
+	bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
+	fa = xfs_btree_check_memblock_siblings(btp, bno,
+			block->bb_u.l.bb_leftsib);
+	if (!fa)
+		fa = xfs_btree_check_memblock_siblings(btp, bno,
+				block->bb_u.l.bb_rightsib);
+	return fa;
+}
+
+/*
  * Check a short btree block header.  Return the address of the failing check,
  * or NULL if everything is ok.
  */
@@ -216,9 +265,17 @@ __xfs_btree_check_block(
 	int			level,
 	struct xfs_buf		*bp)
 {
-	if (cur->bc_ops->type == XFS_BTREE_TYPE_AG)
+	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_MEM:
+		return __xfs_btree_check_memblock(cur, block, level, bp);
+	case XFS_BTREE_TYPE_AG:
 		return __xfs_btree_check_agblock(cur, block, level, bp);
-	return __xfs_btree_check_fsblock(cur, block, level, bp);
+	case XFS_BTREE_TYPE_INODE:
+		return __xfs_btree_check_fsblock(cur, block, level, bp);
+	default:
+		ASSERT(0);
+		return __this_address;
+	}
 }
 
 static inline unsigned int xfs_btree_block_errtag(struct xfs_btree_cur *cur)
@@ -262,14 +319,22 @@ __xfs_btree_check_ptr(
 	if (level <= 0)
 		return -EFSCORRUPTED;
 
-	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
+	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_MEM:
+		if (!xfbtree_verify_bno(cur->bc_mem.xfbtree,
+				be64_to_cpu((&ptr->l)[index])))
+			return -EFSCORRUPTED;
+		break;
+	case XFS_BTREE_TYPE_INODE:
 		if (!xfs_verify_fsbno(cur->bc_mp,
 				be64_to_cpu((&ptr->l)[index])))
 			return -EFSCORRUPTED;
-	} else {
+		break;
+	case XFS_BTREE_TYPE_AG:
 		if (!xfs_verify_agbno(cur->bc_ag.pag,
 				be32_to_cpu((&ptr->s)[index])))
 			return -EFSCORRUPTED;
+		break;
 	}
 
 	return 0;
@@ -290,17 +355,26 @@ xfs_btree_check_ptr(
 
 	error = __xfs_btree_check_ptr(cur, ptr, index, level);
 	if (error) {
-		if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
+		switch (cur->bc_ops->type) {
+		case XFS_BTREE_TYPE_MEM:
+			xfs_err(cur->bc_mp,
+"In-memory: Corrupt %sbt flags 0x%x pointer at level %d index %d fa %pS.",
+				cur->bc_ops->name, cur->bc_flags, level, index,
+				__this_address);
+			break;
+		case XFS_BTREE_TYPE_INODE:
 			xfs_err(cur->bc_mp,
 "Inode %llu fork %d: Corrupt %sbt pointer at level %d index %d.",
 				cur->bc_ino.ip->i_ino,
 				cur->bc_ino.whichfork, cur->bc_ops->name,
 				level, index);
-		} else {
+			break;
+		case XFS_BTREE_TYPE_AG:
 			xfs_err(cur->bc_mp,
 "AG %u: Corrupt %sbt pointer at level %d index %d.",
 				cur->bc_ag.pag->pag_agno, cur->bc_ops->name,
 				level, index);
+			break;
 		}
 		xfs_btree_mark_sick(cur);
 	}
@@ -457,11 +531,35 @@ xfs_btree_del_cursor(
 	case XFS_BTREE_TYPE_INODE:
 		/* nothing to do */
 		break;
+	case XFS_BTREE_TYPE_MEM:
+		if (cur->bc_mem.pag)
+			xfs_perag_put(cur->bc_mem.pag);
+		break;
 	}
 
 	kmem_cache_free(cur->bc_cache, cur);
 }
 
+/* Return the buffer target for this btree's buffer. */
+static inline struct xfs_buftarg *
+xfs_btree_buftarg(
+	struct xfs_btree_cur	*cur)
+{
+	if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
+		return cur->bc_mem.xfbtree->target;
+	return cur->bc_mp->m_ddev_targp;
+}
+
+/* Return the block size (in units of 512b sectors) for this btree. */
+static inline unsigned int
+xfs_btree_bbsize(
+	struct xfs_btree_cur	*cur)
+{
+	if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
+		return XFBNO_BBSIZE;
+	return cur->bc_mp->m_bsize;
+}
+
 /*
  * Duplicate the btree cursor.
  * Allocate a new one, copy the record, re-get the buffers.
@@ -505,10 +603,11 @@ xfs_btree_dup_cursor(
 		new->bc_levels[i].ra = cur->bc_levels[i].ra;
 		bp = cur->bc_levels[i].bp;
 		if (bp) {
-			error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-						   xfs_buf_daddr(bp), mp->m_bsize,
-						   0, &bp,
-						   cur->bc_ops->buf_ops);
+			error = xfs_trans_read_buf(mp, tp,
+					xfs_btree_buftarg(cur),
+					xfs_buf_daddr(bp),
+					xfs_btree_bbsize(cur), 0, &bp,
+					cur->bc_ops->buf_ops);
 			if (xfs_metadata_is_sick(error))
 				xfs_btree_mark_sick(new);
 			if (error) {
@@ -886,6 +985,32 @@ xfs_btree_readahead_fsblock(
 }
 
 STATIC int
+xfs_btree_readahead_memblock(
+	struct xfs_btree_cur	*cur,
+	int			lr,
+	struct xfs_btree_block	*block)
+{
+	struct xfs_buftarg	*btp = cur->bc_mem.xfbtree->target;
+	xfbno_t			left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+	xfbno_t			right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+	int			rval = 0;
+
+	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
+		xfs_buf_readahead(btp, xfbno_to_daddr(left), XFBNO_BBSIZE,
+				cur->bc_ops->buf_ops);
+		rval++;
+	}
+
+	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
+		xfs_buf_readahead(btp, xfbno_to_daddr(right), XFBNO_BBSIZE,
+				cur->bc_ops->buf_ops);
+		rval++;
+	}
+
+	return rval;
+}
+
+STATIC int
 xfs_btree_readahead_agblock(
 	struct xfs_btree_cur	*cur,
 	int			lr,
@@ -939,9 +1064,17 @@ xfs_btree_readahead(
 	cur->bc_levels[lev].ra |= lr;
 	block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
 
-	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
+	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_AG:
+		return xfs_btree_readahead_agblock(cur, lr, block);
+	case XFS_BTREE_TYPE_INODE:
 		return xfs_btree_readahead_fsblock(cur, lr, block);
-	return xfs_btree_readahead_agblock(cur, lr, block);
+	case XFS_BTREE_TYPE_MEM:
+		return xfs_btree_readahead_memblock(cur, lr, block);
+	default:
+		ASSERT(0);
+		return 0;
+	}
 }
 
 STATIC int
@@ -950,23 +1083,24 @@ xfs_btree_ptr_to_daddr(
 	const union xfs_btree_ptr	*ptr,
 	xfs_daddr_t			*daddr)
 {
-	xfs_fsblock_t		fsbno;
-	xfs_agblock_t		agbno;
 	int			error;
 
 	error = xfs_btree_check_ptr(cur, ptr, 0, 1);
 	if (error)
 		return error;
 
-	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
-		fsbno = be64_to_cpu(ptr->l);
-		*daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
-	} else {
-		agbno = be32_to_cpu(ptr->s);
+	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_AG:
 		*daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
-				agbno);
+				be32_to_cpu(ptr->s));
+		break;
+	case XFS_BTREE_TYPE_INODE:
+		*daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+		break;
+	case XFS_BTREE_TYPE_MEM:
+		*daddr = xfbno_to_daddr(be64_to_cpu(ptr->l));
+		break;
 	}
-
 	return 0;
 }
 
@@ -986,8 +1120,9 @@ xfs_btree_readahead_ptr(
 
 	if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
 		return;
-	xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr,
-			  cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+	xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
+			xfs_btree_bbsize(cur) * count,
+			cur->bc_ops->buf_ops);
 }
 
 /*
@@ -1043,6 +1178,17 @@ xfs_btree_set_ptr_null(
 		ptr->s = cpu_to_be32(NULLAGBLOCK);
 }
 
+static inline bool
+xfs_btree_ptrs_equal(
+	struct xfs_btree_cur		*cur,
+	union xfs_btree_ptr		*ptr1,
+	union xfs_btree_ptr		*ptr2)
+{
+	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
+		return ptr1->l == ptr2->l;
+	return ptr1->s == ptr2->s;
+}
+
 /*
  * Get/set/init sibling pointers
  */
@@ -1161,9 +1307,17 @@ static inline __u64
 xfs_btree_owner(
 	struct xfs_btree_cur    *cur)
 {
-	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
+	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_MEM:
+		return cur->bc_mem.xfbtree->owner;
+	case XFS_BTREE_TYPE_INODE:
 		return cur->bc_ino.ip->i_ino;
-	return cur->bc_ag.pag->pag_agno;
+	case XFS_BTREE_TYPE_AG:
+		return cur->bc_ag.pag->pag_agno;
+	default:
+		ASSERT(0);
+		return 0;
+	}
 }
 
 void
@@ -1207,12 +1361,18 @@ xfs_btree_buf_to_ptr(
 	struct xfs_buf		*bp,
 	union xfs_btree_ptr	*ptr)
 {
-	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
-		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
-					xfs_buf_daddr(bp)));
-	else {
+	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_AG:
 		ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
 					xfs_buf_daddr(bp)));
+		break;
+	case XFS_BTREE_TYPE_INODE:
+		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+					xfs_buf_daddr(bp)));
+		break;
+	case XFS_BTREE_TYPE_MEM:
+		ptr->l = cpu_to_be64(xfs_daddr_to_xfbno(xfs_buf_daddr(bp)));
+		break;
 	}
 }
 
@@ -1231,15 +1391,14 @@ xfs_btree_get_buf_block(
 	struct xfs_btree_block		**block,
 	struct xfs_buf			**bpp)
 {
-	struct xfs_mount	*mp = cur->bc_mp;
-	xfs_daddr_t		d;
-	int			error;
+	xfs_daddr_t			d;
+	int				error;
 
 	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
 	if (error)
 		return error;
-	error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
-			0, bpp);
+	error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
+			xfs_btree_bbsize(cur), 0, bpp);
 	if (error)
 		return error;
 
@@ -1270,9 +1429,9 @@ xfs_btree_read_buf_block(
 	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
 	if (error)
 		return error;
-	error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
-				   mp->m_bsize, flags, bpp,
-				   cur->bc_ops->buf_ops);
+	error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
+			xfs_btree_bbsize(cur), flags, bpp,
+			cur->bc_ops->buf_ops);
 	if (xfs_metadata_is_sick(error))
 		xfs_btree_mark_sick(cur);
 	if (error)
@@ -4365,7 +4524,7 @@ xfs_btree_visit_block(
 {
 	struct xfs_btree_block		*block;
 	struct xfs_buf			*bp;
-	union xfs_btree_ptr		rptr;
+	union xfs_btree_ptr		rptr, bufptr;
 	int				error;
 
 	/* do right sibling readahead */
@@ -4388,19 +4547,12 @@ xfs_btree_visit_block(
 	 * return the same block without checking if the right sibling points
 	 * back to us and creates a cyclic reference in the btree.
 	 */
-	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
-		if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
-							xfs_buf_daddr(bp))) {
-			xfs_btree_mark_sick(cur);
-			return -EFSCORRUPTED;
-		}
-	} else {
-		if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
-							xfs_buf_daddr(bp))) {
-			xfs_btree_mark_sick(cur);
-			return -EFSCORRUPTED;
-		}
+	xfs_btree_buf_to_ptr(cur, bp, &bufptr);
+	if (xfs_btree_ptrs_equal(cur, &rptr, &bufptr)) {
+		xfs_btree_mark_sick(cur);
+		return -EFSCORRUPTED;
 	}
+
 	return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
 }
 
@@ -4578,6 +4730,8 @@ xfs_btree_fsblock_verify(
 	xfs_fsblock_t		fsb;
 	xfs_failaddr_t		fa;
 
+	ASSERT(!xfs_buftarg_is_mem(bp->b_target));
+
 	/* numrecs verification */
 	if (be16_to_cpu(block->bb_numrecs) > max_recs)
 		return __this_address;
@@ -4592,6 +4746,36 @@ xfs_btree_fsblock_verify(
 	return fa;
 }
 
+/* Verify an in-memory btree block. */
+xfs_failaddr_t
+xfs_btree_memblock_verify(
+	struct xfs_buf		*bp,
+	unsigned int		max_recs)
+{
+	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
+	struct xfs_buftarg	*btp = bp->b_target;
+	xfs_failaddr_t		fa;
+	xfbno_t			bno;
+
+	ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+	/* numrecs verification */
+	if (be16_to_cpu(block->bb_numrecs) > max_recs)
+		return __this_address;
+
+	/* sibling pointer verification */
+	bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
+	fa = xfs_btree_check_memblock_siblings(btp, bno,
+			block->bb_u.l.bb_leftsib);
+	if (fa)
+		return fa;
+	fa = xfs_btree_check_memblock_siblings(btp, bno,
+			block->bb_u.l.bb_rightsib);
+	if (fa)
+		return fa;
+
+	return NULL;
+}
 /**
  * xfs_btree_agblock_v5hdr_verify() -- verify the v5 fields of a short-format
  *				      btree block
@@ -4633,6 +4817,8 @@ xfs_btree_agblock_verify(
 	xfs_agblock_t		agbno;
 	xfs_failaddr_t		fa;
 
+	ASSERT(!xfs_buftarg_is_mem(bp->b_target));
+
 	/* numrecs verification */
 	if (be16_to_cpu(block->bb_numrecs) > max_recs)
 		return __this_address;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index bacd67cc8ced..f93374278aa1 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -112,6 +112,7 @@ static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y)
 enum xfs_btree_type {
 	XFS_BTREE_TYPE_AG,
 	XFS_BTREE_TYPE_INODE,
+	XFS_BTREE_TYPE_MEM,
 };
 
 struct xfs_btree_ops {
@@ -281,6 +282,10 @@ struct xfs_btree_cur
 			struct xfs_buf		*agbp;
 			struct xbtree_afakeroot	*afake;	/* for staging cursor */
 		} bc_ag;
+		struct {
+			struct xfbtree		*xfbtree;
+			struct xfs_perag	*pag;
+		} bc_mem;
 	};
 
 	/* per-format private data */
@@ -455,6 +460,8 @@ xfs_failaddr_t xfs_btree_fsblock_v5hdr_verify(struct xfs_buf *bp,
 		uint64_t owner);
 xfs_failaddr_t xfs_btree_fsblock_verify(struct xfs_buf *bp,
 		unsigned int max_recs);
+xfs_failaddr_t xfs_btree_memblock_verify(struct xfs_buf *bp,
+		unsigned int max_recs);
 
 unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits,
 		unsigned long long records);
diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c
new file mode 100644
index 000000000000..036061fe32cc
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_btree_mem.c
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trans.h"
+#include "xfs_btree.h"
+#include "xfs_error.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+#include "xfs_ag.h"
+#include "xfs_buf_item.h"
+#include "xfs_trace.h"
+
+/* Set the root of an in-memory btree. */
+void
+xfbtree_set_root(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*ptr,
+	int				inc)
+{
+	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+	cur->bc_mem.xfbtree->root = *ptr;
+	cur->bc_mem.xfbtree->nlevels += inc;
+}
+
+/* Initialize a pointer from the in-memory btree header. */
+void
+xfbtree_init_ptr_from_cur(
+	struct xfs_btree_cur		*cur,
+	union xfs_btree_ptr		*ptr)
+{
+	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+	*ptr = cur->bc_mem.xfbtree->root;
+}
+
+/* Duplicate an in-memory btree cursor. */
+struct xfs_btree_cur *
+xfbtree_dup_cursor(
+	struct xfs_btree_cur		*cur)
+{
+	struct xfs_btree_cur		*ncur;
+
+	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+	ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops,
+			cur->bc_maxlevels, cur->bc_cache);
+	ncur->bc_flags = cur->bc_flags;
+	ncur->bc_nlevels = cur->bc_nlevels;
+	ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
+
+	if (cur->bc_mem.pag)
+		ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
+
+	return ncur;
+}
+
+/* Close the btree xfile and release all resources. */
+void
+xfbtree_destroy(
+	struct xfbtree		*xfbt)
+{
+	xfs_buftarg_drain(xfbt->target);
+}
+
+/* Compute the number of bytes available for records. */
+static inline unsigned int
+xfbtree_rec_bytes(
+	struct xfs_mount		*mp,
+	const struct xfs_btree_ops	*ops)
+{
+	return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
+}
+
+/* Initialize an empty leaf block as the btree root. */
+STATIC int
+xfbtree_init_leaf_block(
+	struct xfs_mount		*mp,
+	struct xfbtree			*xfbt,
+	const struct xfs_btree_ops	*ops)
+{
+	struct xfs_buf			*bp;
+	xfbno_t				bno = xfbt->highest_bno++;
+	int				error;
+
+	error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE,
+			&bp);
+	if (error)
+		return error;
+
+	trace_xfbtree_create_root_buf(xfbt, bp);
+
+	bp->b_ops = ops->buf_ops;
+	xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner);
+	xfs_buf_relse(bp);
+
+	xfbt->root.l = cpu_to_be64(bno);
+	return 0;
+}
+
+/*
+ * Create an in-memory btree root that can be used with the given xmbuf.
+ * Callers must set xfbt->owner.
+ */
+int
+xfbtree_init(
+	struct xfs_mount		*mp,
+	struct xfbtree			*xfbt,
+	struct xfs_buftarg		*btp,
+	const struct xfs_btree_ops	*ops)
+{
+	unsigned int			blocklen = xfbtree_rec_bytes(mp, ops);
+	unsigned int			keyptr_len;
+	int				error;
+
+	/* Requires a long-format CRC-format btree */
+	if (!xfs_has_crc(mp)) {
+		ASSERT(xfs_has_crc(mp));
+		return -EINVAL;
+	}
+	if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) {
+		ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN);
+		return -EINVAL;
+	}
+
+	memset(xfbt, 0, sizeof(*xfbt));
+	xfbt->target = btp;
+
+	/* Set up min/maxrecs for this btree. */
+	keyptr_len = ops->key_len + sizeof(__be64);
+	xfbt->maxrecs[0] = blocklen / ops->rec_len;
+	xfbt->maxrecs[1] = blocklen / keyptr_len;
+	xfbt->minrecs[0] = xfbt->maxrecs[0] / 2;
+	xfbt->minrecs[1] = xfbt->maxrecs[1] / 2;
+	xfbt->highest_bno = 0;
+	xfbt->nlevels = 1;
+
+	/* Initialize the empty btree. */
+	error = xfbtree_init_leaf_block(mp, xfbt, ops);
+	if (error)
+		goto err_freesp;
+
+	trace_xfbtree_init(mp, xfbt, ops);
+
+	return 0;
+
+err_freesp:
+	xfs_buftarg_drain(xfbt->target);
+	return error;
+}
+
+/* Allocate a block to our in-memory btree. */
+int
+xfbtree_alloc_block(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_ptr	*start,
+	union xfs_btree_ptr		*new,
+	int				*stat)
+{
+	struct xfbtree			*xfbt = cur->bc_mem.xfbtree;
+	xfbno_t				bno = xfbt->highest_bno++;
+
+	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+	trace_xfbtree_alloc_block(xfbt, cur, bno);
+
+	/* Fail if the block address exceeds the maximum for the buftarg. */
+	if (!xfbtree_verify_bno(xfbt, bno)) {
+		ASSERT(xfbtree_verify_bno(xfbt, bno));
+		*stat = 0;
+		return 0;
+	}
+
+	new->l = cpu_to_be64(bno);
+	*stat = 1;
+	return 0;
+}
+
+/* Free a block from our in-memory btree. */
+int
+xfbtree_free_block(
+	struct xfs_btree_cur	*cur,
+	struct xfs_buf		*bp)
+{
+	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
+	xfs_daddr_t		daddr = xfs_buf_daddr(bp);
+	xfbno_t			bno = xfs_daddr_to_xfbno(daddr);
+
+	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+	trace_xfbtree_free_block(xfbt, cur, bno);
+
+	if (bno + 1 == xfbt->highest_bno)
+		xfbt->highest_bno--;
+
+	return 0;
+}
+
+/* Return the minimum number of records for a btree block. */
+int
+xfbtree_get_minrecs(
+	struct xfs_btree_cur	*cur,
+	int			level)
+{
+	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
+
+	return xfbt->minrecs[level != 0];
+}
+
+/* Return the maximum number of records for a btree block. */
+int
+xfbtree_get_maxrecs(
+	struct xfs_btree_cur	*cur,
+	int			level)
+{
+	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
+
+	return xfbt->maxrecs[level != 0];
+}
+
+/* If this log item is a buffer item that came from the xfbtree, return it. */
+static inline struct xfs_buf *
+xfbtree_buf_match(
+	struct xfbtree			*xfbt,
+	const struct xfs_log_item	*lip)
+{
+	const struct xfs_buf_log_item	*bli;
+	struct xfs_buf			*bp;
+
+	if (lip->li_type != XFS_LI_BUF)
+		return NULL;
+
+	bli = container_of(lip, struct xfs_buf_log_item, bli_item);
+	bp = bli->bli_buf;
+	if (bp->b_target != xfbt->target)
+		return NULL;
+
+	return bp;
+}
+
+/*
+ * Commit changes to the incore btree immediately by writing all dirty xfbtree
+ * buffers to the backing xfile.  This detaches all xfbtree buffers from the
+ * transaction, even on failure.  The buffer locks are dropped between the
+ * delwri queue and submit, so the caller must synchronize btree access.
+ *
+ * Normally we'd let the buffers commit with the transaction and get written to
+ * the xfile via the log, but online repair stages ephemeral btrees in memory
+ * and uses the btree_staging functions to write new btrees to disk atomically.
+ * The in-memory btree (and its backing store) are discarded at the end of the
+ * repair phase, which means that xfbtree buffers cannot commit with the rest
+ * of a transaction.
+ *
+ * In other words, online repair only needs the transaction to collect buffer
+ * pointers and to avoid buffer deadlocks, not to guarantee consistency of
+ * updates.
+ */
+int
+xfbtree_trans_commit(
+	struct xfbtree		*xfbt,
+	struct xfs_trans	*tp)
+{
+	struct xfs_log_item	*lip, *n;
+	bool			tp_dirty = false;
+	int			error = 0;
+
+	/*
+	 * For each xfbtree buffer attached to the transaction, write the dirty
+	 * buffers to the xfile and release them.
+	 */
+	list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
+		struct xfs_buf	*bp = xfbtree_buf_match(xfbt, lip);
+
+		if (!bp) {
+			if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
+				tp_dirty |= true;
+			continue;
+		}
+
+		trace_xfbtree_trans_commit_buf(xfbt, bp);
+
+		xmbuf_trans_bdetach(tp, bp);
+
+		/*
+		 * If the buffer fails verification, note the failure but
+		 * continue walking the transaction items so that we remove all
+		 * ephemeral btree buffers.
+		 */
+		if (!error)
+			error = xmbuf_finalize(bp);
+
+		xfs_buf_relse(bp);
+	}
+
+	/*
+	 * Reset the transaction's dirty flag to reflect the dirty state of the
+	 * log items that are still attached.
+	 */
+	tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
+			(tp_dirty ? XFS_TRANS_DIRTY : 0);
+
+	return error;
+}
+
+/*
+ * Cancel changes to the incore btree by detaching all the xfbtree buffers.
+ * Changes are not undone, so callers must not access the btree ever again.
+ */
+void
+xfbtree_trans_cancel(
+	struct xfbtree		*xfbt,
+	struct xfs_trans	*tp)
+{
+	struct xfs_log_item	*lip, *n;
+	bool			tp_dirty = false;
+
+	list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
+		struct xfs_buf	*bp = xfbtree_buf_match(xfbt, lip);
+
+		if (!bp) {
+			if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
+				tp_dirty |= true;
+			continue;
+		}
+
+		trace_xfbtree_trans_cancel_buf(xfbt, bp);
+
+		xmbuf_trans_bdetach(tp, bp);
+		xfs_buf_relse(bp);
+	}
+
+	/*
+	 * Reset the transaction's dirty flag to reflect the dirty state of the
+	 * log items that are still attached.
+	 */
+	tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
+			(tp_dirty ? XFS_TRANS_DIRTY : 0);
+}
diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h
new file mode 100644
index 000000000000..1c3825786ec8
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_btree_mem.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#ifndef __XFS_BTREE_MEM_H__
+#define __XFS_BTREE_MEM_H__
+
+typedef uint64_t xfbno_t;
+
+#define XFBNO_BLOCKSIZE			(XMBUF_BLOCKSIZE)
+#define XFBNO_BBSHIFT			(XMBUF_BLOCKSHIFT - BBSHIFT)
+#define XFBNO_BBSIZE			(XFBNO_BLOCKSIZE >> BBSHIFT)
+
+static inline xfs_daddr_t xfbno_to_daddr(xfbno_t blkno)
+{
+	return blkno << XFBNO_BBSHIFT;
+}
+
+static inline xfbno_t xfs_daddr_to_xfbno(xfs_daddr_t daddr)
+{
+	return daddr >> XFBNO_BBSHIFT;
+}
+
+struct xfbtree {
+	/* buffer cache target for this in-memory btree */
+	struct xfs_buftarg		*target;
+
+	/* Highest block number that has been written to. */
+	xfbno_t				highest_bno;
+
+	/* Owner of this btree. */
+	unsigned long long		owner;
+
+	/* Btree header */
+	union xfs_btree_ptr		root;
+	unsigned int			nlevels;
+
+	/* Minimum and maximum records per block. */
+	unsigned int			maxrecs[2];
+	unsigned int			minrecs[2];
+};
+
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+static inline bool xfbtree_verify_bno(struct xfbtree *xfbt, xfbno_t bno)
+{
+	return xmbuf_verify_daddr(xfbt->target, xfbno_to_daddr(bno));
+}
+
+void xfbtree_set_root(struct xfs_btree_cur *cur,
+		const union xfs_btree_ptr *ptr, int inc);
+void xfbtree_init_ptr_from_cur(struct xfs_btree_cur *cur,
+		union xfs_btree_ptr *ptr);
+struct xfs_btree_cur *xfbtree_dup_cursor(struct xfs_btree_cur *cur);
+
+int xfbtree_get_minrecs(struct xfs_btree_cur *cur, int level);
+int xfbtree_get_maxrecs(struct xfs_btree_cur *cur, int level);
+
+int xfbtree_alloc_block(struct xfs_btree_cur *cur,
+		const union xfs_btree_ptr *start, union xfs_btree_ptr *ptr,
+		int *stat);
+int xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp);
+
+/* Callers must set xfbt->target and xfbt->owner before calling this */
+int xfbtree_init(struct xfs_mount *mp, struct xfbtree *xfbt,
+		struct xfs_buftarg *btp, const struct xfs_btree_ops *ops);
+void xfbtree_destroy(struct xfbtree *xfbt);
+
+int xfbtree_trans_commit(struct xfbtree *xfbt, struct xfs_trans *tp);
+void xfbtree_trans_cancel(struct xfbtree *xfbt, struct xfs_trans *tp);
+#else
+# define xfbtree_verify_bno(...)	(false)
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
+
+#endif /* __XFS_BTREE_MEM_H__ */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index aeac9cae4ad4..6828e72824fb 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -15,6 +15,7 @@
 #include "xfs_quota.h"
 #include "xfs_qm.h"
 #include "xfs_scrub.h"
+#include "xfs_buf_mem.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -190,6 +191,10 @@ xchk_teardown(
 		sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
 		mnt_drop_write_file(sc->file);
 	}
+	if (sc->xmbtp) {
+		xmbuf_free(sc->xmbtp);
+		sc->xmbtp = NULL;
+	}
 	if (sc->xfile) {
 		xfile_destroy(sc->xfile);
 		sc->xfile = NULL;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index f99a3c21d02e..1247284c17a0 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -99,6 +99,9 @@ struct xfs_scrub {
 	/* xfile used by the scrubbers; freed at teardown. */
 	struct xfile			*xfile;
 
+	/* buffer target for in-memory btrees; also freed at teardown. */
+	struct xfs_buftarg		*xmbtp;
+
 	/* Lock flags for @ip. */
 	uint				ilock_flags;
 
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ff243e3176a5..7fc26e64368d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -21,6 +21,7 @@
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_ag.h"
+#include "xfs_buf_mem.h"
 
 struct kmem_cache *xfs_buf_cache;
 
@@ -318,7 +319,9 @@ xfs_buf_free(
 
 	ASSERT(list_empty(&bp->b_lru));
 
-	if (bp->b_flags & _XBF_PAGES)
+	if (xfs_buftarg_is_mem(bp->b_target))
+		xmbuf_unmap_page(bp);
+	else if (bp->b_flags & _XBF_PAGES)
 		xfs_buf_free_pages(bp);
 	else if (bp->b_flags & _XBF_KMEM)
 		kfree(bp->b_addr);
@@ -510,18 +513,18 @@ static const struct rhashtable_params xfs_buf_hash_params = {
 };
 
 int
-xfs_buf_hash_init(
-	struct xfs_perag	*pag)
+xfs_buf_cache_init(
+	struct xfs_buf_cache	*bch)
 {
-	spin_lock_init(&pag->pag_buf_lock);
-	return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
+	spin_lock_init(&bch->bc_lock);
+	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
 }
 
 void
-xfs_buf_hash_destroy(
-	struct xfs_perag	*pag)
+xfs_buf_cache_destroy(
+	struct xfs_buf_cache	*bch)
 {
-	rhashtable_destroy(&pag->pag_buf_hash);
+	rhashtable_destroy(&bch->bc_hash);
 }
 
 static int
@@ -584,7 +587,7 @@ xfs_buf_find_lock(
 
 static inline int
 xfs_buf_lookup(
-	struct xfs_perag	*pag,
+	struct xfs_buf_cache	*bch,
 	struct xfs_buf_map	*map,
 	xfs_buf_flags_t		flags,
 	struct xfs_buf		**bpp)
@@ -593,7 +596,7 @@ xfs_buf_lookup(
 	int			error;
 
 	rcu_read_lock();
-	bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params);
+	bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
 	if (!bp || !atomic_inc_not_zero(&bp->b_hold)) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -618,6 +621,7 @@ xfs_buf_lookup(
 static int
 xfs_buf_find_insert(
 	struct xfs_buftarg	*btp,
+	struct xfs_buf_cache	*bch,
 	struct xfs_perag	*pag,
 	struct xfs_buf_map	*cmap,
 	struct xfs_buf_map	*map,
@@ -633,31 +637,33 @@ xfs_buf_find_insert(
 	if (error)
 		goto out_drop_pag;
 
-	/*
-	 * For buffers that fit entirely within a single page, first attempt to
-	 * allocate the memory from the heap to minimise memory usage. If we
-	 * can't get heap memory for these small buffers, we fall back to using
-	 * the page allocator.
-	 */
-	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
-	    xfs_buf_alloc_kmem(new_bp, flags) < 0) {
+	if (xfs_buftarg_is_mem(new_bp->b_target)) {
+		error = xmbuf_map_page(new_bp);
+	} else if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
+		   xfs_buf_alloc_kmem(new_bp, flags) < 0) {
+		/*
+		 * For buffers that fit entirely within a single page, first
+		 * attempt to allocate the memory from the heap to minimise
+		 * memory usage. If we can't get heap memory for these small
+		 * buffers, we fall back to using the page allocator.
+		 */
 		error = xfs_buf_alloc_pages(new_bp, flags);
-		if (error)
-			goto out_free_buf;
 	}
+	if (error)
+		goto out_free_buf;
 
-	spin_lock(&pag->pag_buf_lock);
-	bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash,
+	spin_lock(&bch->bc_lock);
+	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
 			&new_bp->b_rhash_head, xfs_buf_hash_params);
 	if (IS_ERR(bp)) {
 		error = PTR_ERR(bp);
-		spin_unlock(&pag->pag_buf_lock);
+		spin_unlock(&bch->bc_lock);
 		goto out_free_buf;
 	}
 	if (bp) {
 		/* found an existing buffer */
 		atomic_inc(&bp->b_hold);
-		spin_unlock(&pag->pag_buf_lock);
+		spin_unlock(&bch->bc_lock);
 		error = xfs_buf_find_lock(bp, flags);
 		if (error)
 			xfs_buf_rele(bp);
@@ -668,17 +674,40 @@ xfs_buf_find_insert(
 
 	/* The new buffer keeps the perag reference until it is freed. */
 	new_bp->b_pag = pag;
-	spin_unlock(&pag->pag_buf_lock);
+	spin_unlock(&bch->bc_lock);
 	*bpp = new_bp;
 	return 0;
 
 out_free_buf:
 	xfs_buf_free(new_bp);
 out_drop_pag:
-	xfs_perag_put(pag);
+	if (pag)
+		xfs_perag_put(pag);
 	return error;
 }
 
+static inline struct xfs_perag *
+xfs_buftarg_get_pag(
+	struct xfs_buftarg		*btp,
+	const struct xfs_buf_map	*map)
+{
+	struct xfs_mount		*mp = btp->bt_mount;
+
+	if (xfs_buftarg_is_mem(btp))
+		return NULL;
+	return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
+}
+
+static inline struct xfs_buf_cache *
+xfs_buftarg_buf_cache(
+	struct xfs_buftarg		*btp,
+	struct xfs_perag		*pag)
+{
+	if (pag)
+		return &pag->pag_bcache;
+	return btp->bt_cache;
+}
+
 /*
  * Assembles a buffer covering the specified range. The code is optimised for
  * cache hits, as metadata intensive workloads will see 3 orders of magnitude
@@ -692,6 +721,7 @@ xfs_buf_get_map(
 	xfs_buf_flags_t		flags,
 	struct xfs_buf		**bpp)
 {
+	struct xfs_buf_cache	*bch;
 	struct xfs_perag	*pag;
 	struct xfs_buf		*bp = NULL;
 	struct xfs_buf_map	cmap = { .bm_bn = map[0].bm_bn };
@@ -707,10 +737,10 @@ xfs_buf_get_map(
 	if (error)
 		return error;
 
-	pag = xfs_perag_get(btp->bt_mount,
-			    xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
+	pag = xfs_buftarg_get_pag(btp, &cmap);
+	bch = xfs_buftarg_buf_cache(btp, pag);
 
-	error = xfs_buf_lookup(pag, &cmap, flags, &bp);
+	error = xfs_buf_lookup(bch, &cmap, flags, &bp);
 	if (error && error != -ENOENT)
 		goto out_put_perag;
 
@@ -722,13 +752,14 @@ xfs_buf_get_map(
 			goto out_put_perag;
 
 		/* xfs_buf_find_insert() consumes the perag reference. */
-		error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
+		error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
 				flags, &bp);
 		if (error)
 			return error;
 	} else {
 		XFS_STATS_INC(btp->bt_mount, xb_get_locked);
-		xfs_perag_put(pag);
+		if (pag)
+			xfs_perag_put(pag);
 	}
 
 	/* We do not hold a perag reference anymore. */
@@ -756,7 +787,8 @@ xfs_buf_get_map(
 	return 0;
 
 out_put_perag:
-	xfs_perag_put(pag);
+	if (pag)
+		xfs_perag_put(pag);
 	return error;
 }
 
@@ -903,6 +935,13 @@ xfs_buf_readahead_map(
 {
 	struct xfs_buf		*bp;
 
+	/*
+	 * Currently we don't have a good means or justification for performing
+	 * xmbuf_map_page asynchronously, so we don't do readahead.
+	 */
+	if (xfs_buftarg_is_mem(target))
+		return;
+
 	xfs_buf_read_map(target, map, nmaps,
 		     XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
 		     __this_address);
@@ -968,7 +1007,10 @@ xfs_buf_get_uncached(
 	if (error)
 		return error;
 
-	error = xfs_buf_alloc_pages(bp, flags);
+	if (xfs_buftarg_is_mem(bp->b_target))
+		error = xmbuf_map_page(bp);
+	else
+		error = xfs_buf_alloc_pages(bp, flags);
 	if (error)
 		goto fail_free_buf;
 
@@ -1016,7 +1058,9 @@ static void
 xfs_buf_rele_cached(
 	struct xfs_buf		*bp)
 {
+	struct xfs_buftarg	*btp = bp->b_target;
 	struct xfs_perag	*pag = bp->b_pag;
+	struct xfs_buf_cache	*bch = xfs_buftarg_buf_cache(btp, pag);
 	bool			release;
 	bool			freebuf = false;
 
@@ -1035,7 +1079,7 @@ xfs_buf_rele_cached(
 	 * leading to a use-after-free scenario.
 	 */
 	spin_lock(&bp->b_lock);
-	release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
+	release = atomic_dec_and_lock(&bp->b_hold, &bch->bc_lock);
 	if (!release) {
 		/*
 		 * Drop the in-flight state if the buffer is already on the LRU
@@ -1056,11 +1100,11 @@ xfs_buf_rele_cached(
 		 * buffer for the LRU and clear the (now stale) dispose list
 		 * state flag
 		 */
-		if (list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru)) {
+		if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru)) {
 			bp->b_state &= ~XFS_BSTATE_DISPOSE;
 			atomic_inc(&bp->b_hold);
 		}
-		spin_unlock(&pag->pag_buf_lock);
+		spin_unlock(&bch->bc_lock);
 	} else {
 		/*
 		 * most of the time buffers will already be removed from the
@@ -1069,16 +1113,17 @@ xfs_buf_rele_cached(
 		 * was on was the disposal list
 		 */
 		if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
-			list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
+			list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
 		} else {
 			ASSERT(list_empty(&bp->b_lru));
 		}
 
 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-		rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
-				       xfs_buf_hash_params);
-		spin_unlock(&pag->pag_buf_lock);
-		xfs_perag_put(pag);
+		rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
+				xfs_buf_hash_params);
+		spin_unlock(&bch->bc_lock);
+		if (pag)
+			xfs_perag_put(pag);
 		freebuf = true;
 	}
 
@@ -1607,6 +1652,12 @@ _xfs_buf_ioapply(
 	/* we only use the buffer cache for meta-data */
 	op |= REQ_META;
 
+	/* in-memory targets are directly mapped, no IO required. */
+	if (xfs_buftarg_is_mem(bp->b_target)) {
+		xfs_buf_ioend(bp);
+		return;
+	}
+
 	/*
 	 * Walk all the vectors issuing IO on them. Set up the initial offset
 	 * into the buffer and the desired IO size before we start -
@@ -1962,19 +2013,24 @@ xfs_buftarg_shrink_count(
 }
 
 void
-xfs_free_buftarg(
+xfs_destroy_buftarg(
 	struct xfs_buftarg	*btp)
 {
 	shrinker_free(btp->bt_shrinker);
 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
 	percpu_counter_destroy(&btp->bt_io_count);
 	list_lru_destroy(&btp->bt_lru);
+}
 
+void
+xfs_free_buftarg(
+	struct xfs_buftarg	*btp)
+{
+	xfs_destroy_buftarg(btp);
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 	/* the main block device is closed by kill_block_super */
 	if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
 		bdev_release(btp->bt_bdev_handle);
-
 	kfree(btp);
 }
 
@@ -1997,6 +2053,45 @@ xfs_setsize_buftarg(
 	return 0;
 }
 
+int
+xfs_init_buftarg(
+	struct xfs_buftarg		*btp,
+	size_t				logical_sectorsize,
+	const char			*descr)
+{
+	/* Set up device logical sector size mask */
+	btp->bt_logical_sectorsize = logical_sectorsize;
+	btp->bt_logical_sectormask = logical_sectorsize - 1;
+
+	/*
+	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
+	 * per 30 seconds so as to not spam logs too much on repeated errors.
+	 */
+	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
+			     DEFAULT_RATELIMIT_BURST);
+
+	if (list_lru_init(&btp->bt_lru))
+		return -ENOMEM;
+	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
+		goto out_destroy_lru;
+
+	btp->bt_shrinker =
+		shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s", descr);
+	if (!btp->bt_shrinker)
+		goto out_destroy_io_count;
+	btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
+	btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
+	btp->bt_shrinker->private_data = btp;
+	shrinker_register(btp->bt_shrinker);
+	return 0;
+
+out_destroy_io_count:
+	percpu_counter_destroy(&btp->bt_io_count);
+out_destroy_lru:
+	list_lru_destroy(&btp->bt_lru);
+	return -ENOMEM;
+}
+
 struct xfs_buftarg *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
@@ -2023,41 +2118,12 @@ xfs_alloc_buftarg(
 	 */
 	if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
 		goto error_free;
-
-	/* Set up device logical sector size mask */
-	btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
-	btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
-
-	/*
-	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
-	 * per 30 seconds so as to not spam logs too much on repeated errors.
-	 */
-	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
-			     DEFAULT_RATELIMIT_BURST);
-
-	if (list_lru_init(&btp->bt_lru))
+	if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
+			mp->m_super->s_id))
 		goto error_free;
 
-	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
-		goto error_lru;
-
-	btp->bt_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s",
-					  mp->m_super->s_id);
-	if (!btp->bt_shrinker)
-		goto error_pcpu;
-
-	btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
-	btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
-	btp->bt_shrinker->private_data = btp;
-
-	shrinker_register(btp->bt_shrinker);
-
 	return btp;
 
-error_pcpu:
-	percpu_counter_destroy(&btp->bt_io_count);
-error_lru:
-	list_lru_destroy(&btp->bt_lru);
 error_free:
 	kfree(btp);
 	return NULL;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b9216dee7810..73249abca968 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -83,6 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
 #define XFS_BSTATE_DISPOSE	 (1 << 0)	/* buffer being discarded */
 #define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
 
+struct xfs_buf_cache {
+	spinlock_t		bc_lock;
+	struct rhashtable	bc_hash;
+};
+
+int xfs_buf_cache_init(struct xfs_buf_cache *bch);
+void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
+
 /*
  * The xfs_buftarg contains 2 notions of "sector size" -
  *
@@ -101,6 +109,7 @@ struct xfs_buftarg {
 	struct bdev_handle	*bt_bdev_handle;
 	struct block_device	*bt_bdev;
 	struct dax_device	*bt_daxdev;
+	struct file		*bt_file;
 	u64			bt_dax_part_off;
 	struct xfs_mount	*bt_mount;
 	unsigned int		bt_meta_sectorsize;
@@ -114,6 +123,9 @@ struct xfs_buftarg {
 
 	struct percpu_counter	bt_io_count;
 	struct ratelimit_state	bt_ioerror_rl;
+
+	/* built-in cache, if we're not using the perag one */
+	struct xfs_buf_cache	bt_cache[];
 };
 
 #define XB_PAGES	2
@@ -379,4 +391,9 @@ int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
 bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
 bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
 
+/* for xfs_buf_mem.c only: */
+int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize,
+		const char *descr);
+void xfs_destroy_buftarg(struct xfs_buftarg *btp);
+
 #endif	/* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
new file mode 100644
index 000000000000..8ad38c64708e
--- /dev/null
+++ b/fs/xfs/xfs_buf_mem.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_buf.h"
+#include "xfs_buf_mem.h"
+#include "xfs_trace.h"
+#include <linux/shmem_fs.h>
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_error.h"
+
+/*
+ * Buffer Cache for In-Memory Files
+ * ================================
+ *
+ * Online fsck wants to create ephemeral ordered recordsets.  The existing
+ * btree infrastructure can do this, but we need the buffer cache to target
+ * memory instead of block devices.
+ *
+ * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
+ * requirements.  Therefore, the xmbuf mechanism uses an unlinked shmem file to
+ * store our staging data.  This file is not installed in the file descriptor
+ * table so that user programs cannot access the data, which means that the
+ * xmbuf must be freed with xmbuf_destroy.
+ *
+ * xmbufs assume that the caller will handle all required concurrency
+ * management; standard vfs locks (freezer and inode) are not taken.  Reads
+ * and writes are satisfied directly from the page cache.
+ *
+ * The only supported block size is PAGE_SIZE, and we cannot use highmem.
+ */
+
+/*
+ * shmem files used to back an in-memory buffer cache must not be exposed to
+ * userspace.  Upper layers must coordinate access to the one handle returned
+ * by the constructor, so establish a separate lock class for xmbufs to avoid
+ * confusing lockdep.
+ */
+static struct lock_class_key xmbuf_i_mutex_key;
+
+/*
+ * Allocate a buffer cache target for a memory-backed file and set up the
+ * buffer target.
+ */
+int
+xmbuf_alloc(
+	struct xfs_mount	*mp,
+	const char		*descr,
+	struct xfs_buftarg	**btpp)
+{
+	struct file		*file;
+	struct inode		*inode;
+	struct xfs_buftarg	*btp;
+	int			error;
+
+	btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
+	if (!btp)
+		return -ENOMEM;
+
+	file = shmem_kernel_file_setup(descr, 0, 0);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto out_free_btp;
+	}
+	inode = file_inode(file);
+
+	/* private file, private locking */
+	lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
+
+	/*
+	 * We don't want to bother with kmapping data during repair, so don't
+	 * allow highmem pages to back this mapping.
+	 */
+	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
+
+	/* ensure all writes are below EOF to avoid pagecache zeroing */
+	i_size_write(inode, inode->i_sb->s_maxbytes);
+
+	trace_xmbuf_create(btp);
+
+	error = xfs_buf_cache_init(btp->bt_cache);
+	if (error)
+		goto out_file;
+
+	/* Initialize buffer target */
+	btp->bt_mount = mp;
+	btp->bt_dev = (dev_t)-1U;
+	btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
+	btp->bt_file = file;
+	btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
+	btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
+
+	error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
+	if (error)
+		goto out_bcache;
+
+	*btpp = btp;
+	return 0;
+
+out_bcache:
+	xfs_buf_cache_destroy(btp->bt_cache);
+out_file:
+	fput(file);
+out_free_btp:
+	kfree(btp);
+	return error;
+}
+
+/* Free a buffer cache target for a memory-backed buffer cache. */
+void
+xmbuf_free(
+	struct xfs_buftarg	*btp)
+{
+	ASSERT(xfs_buftarg_is_mem(btp));
+	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
+
+	trace_xmbuf_free(btp);
+
+	xfs_destroy_buftarg(btp);
+	xfs_buf_cache_destroy(btp->bt_cache);
+	fput(btp->bt_file);
+	kfree(btp);
+}
+
+/* Directly map a shmem page into the buffer cache. */
+int
+xmbuf_map_page(
+	struct xfs_buf		*bp)
+{
+	struct inode		*inode = file_inode(bp->b_target->bt_file);
+	struct folio		*folio = NULL;
+	struct page		*page;
+	loff_t                  pos = BBTOB(xfs_buf_daddr(bp));
+	int			error;
+
+	ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+	if (bp->b_map_count != 1)
+		return -ENOMEM;
+	if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
+		return -ENOMEM;
+	if (offset_in_page(pos) != 0) {
+		ASSERT(offset_in_page(pos));
+		return -ENOMEM;
+	}
+
+	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE);
+	if (error)
+		return error;
+
+	if (filemap_check_wb_err(inode->i_mapping, 0)) {
+		folio_unlock(folio);
+		folio_put(folio);
+		return -EIO;
+	}
+
+	page = folio_file_page(folio, pos >> PAGE_SHIFT);
+
+	/*
+	 * Mark the page dirty so that it won't be reclaimed once we drop the
+	 * (potentially last) reference in xmbuf_unmap_page.
+	 */
+	set_page_dirty(page);
+	unlock_page(page);
+
+	bp->b_addr = page_address(page);
+	bp->b_pages = bp->b_page_array;
+	bp->b_pages[0] = page;
+	bp->b_page_count = 1;
+	return 0;
+}
+
+/* Unmap a shmem page that was mapped into the buffer cache. */
+void
+xmbuf_unmap_page(
+	struct xfs_buf		*bp)
+{
+	struct page		*page = bp->b_pages[0];
+
+	ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+	put_page(page);
+
+	bp->b_addr = NULL;
+	bp->b_pages[0] = NULL;
+	bp->b_pages = NULL;
+	bp->b_page_count = 0;
+}
+
+/* Is this a valid daddr within the buftarg? */
+bool
+xmbuf_verify_daddr(
+	struct xfs_buftarg	*btp,
+	xfs_daddr_t		daddr)
+{
+	struct inode		*inode = file_inode(btp->bt_file);
+
+	ASSERT(xfs_buftarg_is_mem(btp));
+
+	return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
+}
+
+/* Discard the page backing this buffer. */
+static void
+xmbuf_stale(
+	struct xfs_buf		*bp)
+{
+	struct inode		*inode = file_inode(bp->b_target->bt_file);
+	loff_t			pos;
+
+	ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+	pos = BBTOB(xfs_buf_daddr(bp));
+	shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
+}
+
+/*
+ * Finalize a buffer -- discard the backing page if it's stale, or run the
+ * write verifier to detect problems.
+ */
+int
+xmbuf_finalize(
+	struct xfs_buf		*bp)
+{
+	xfs_failaddr_t		fa;
+	int			error = 0;
+
+	if (bp->b_flags & XBF_STALE) {
+		xmbuf_stale(bp);
+		return 0;
+	}
+
+	/*
+	 * Although this btree is ephemeral, validate the buffer structure so
+	 * that we can detect memory corruption errors and software bugs.
+	 */
+	fa = bp->b_ops->verify_struct(bp);
+	if (fa) {
+		error = -EFSCORRUPTED;
+		xfs_verifier_error(bp, error, fa);
+	}
+
+	return error;
+}
+
+/*
+ * Detach this xmbuf buffer from the transaction by any means necessary.
+ * All buffers are direct-mapped, so they do not need bwrite.
+ */
+void
+xmbuf_trans_bdetach(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buf_log_item	*bli = bp->b_log_item;
+
+	ASSERT(bli != NULL);
+
+	bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
+			    XFS_BLI_LOGGED | XFS_BLI_STALE);
+	clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
+
+	while (bp->b_log_item != NULL)
+		xfs_trans_bdetach(tp, bp);
+}
diff --git a/fs/xfs/xfs_buf_mem.h b/fs/xfs/xfs_buf_mem.h
new file mode 100644
index 000000000000..eed4a7b63232
--- /dev/null
+++ b/fs/xfs/xfs_buf_mem.h
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <[email protected]>
+ */
+#ifndef __XFS_BUF_MEM_H__
+#define __XFS_BUF_MEM_H__
+
+#define XMBUF_BLOCKSIZE			(PAGE_SIZE)
+#define XMBUF_BLOCKSHIFT		(PAGE_SHIFT)
+
+#ifdef CONFIG_XFS_MEMORY_BUFS
+static inline bool xfs_buftarg_is_mem(const struct xfs_buftarg *btp)
+{
+	return btp->bt_bdev == NULL;
+}
+
+int xmbuf_alloc(struct xfs_mount *mp, const char *descr,
+		struct xfs_buftarg **btpp);
+void xmbuf_free(struct xfs_buftarg *btp);
+
+int xmbuf_map_page(struct xfs_buf *bp);
+void xmbuf_unmap_page(struct xfs_buf *bp);
+bool xmbuf_verify_daddr(struct xfs_buftarg *btp, xfs_daddr_t daddr);
+void xmbuf_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
+int xmbuf_finalize(struct xfs_buf *bp);
+#else
+# define xfs_buftarg_is_mem(...)	(false)
+# define xmbuf_map_page(...)		(-ENOMEM)
+# define xmbuf_unmap_page(...)		((void)0)
+# define xmbuf_verify_daddr(...)	(false)
+#endif /* CONFIG_XFS_MEMORY_BUFS */
+
+#endif /* __XFS_BUF_MEM_H__ */
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index b28546b6fe34..b39f959146bc 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -527,6 +527,9 @@ xfs_btree_mark_sick(
 	struct xfs_btree_cur		*cur)
 {
 	switch (cur->bc_ops->type) {
+	case XFS_BTREE_TYPE_MEM:
+		/* no health state tracking for ephemeral btrees */
+		return;
 	case XFS_BTREE_TYPE_AG:
 		ASSERT(cur->bc_ops->sick_mask);
 		xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 6c44e6db4d86..e880aa48de68 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -505,9 +505,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-int xfs_buf_hash_init(struct xfs_perag *pag);
-void xfs_buf_hash_destroy(struct xfs_perag *pag);
-
 extern void	xfs_uuid_table_free(void);
 extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 8a5dc1538aa8..3f253884fe5b 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -36,6 +36,8 @@
 #include "xfs_error.h"
 #include <linux/iomap.h>
 #include "xfs_iomap.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e876a47f1427..e85ea1fdb9c1 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -79,6 +79,8 @@ union xfs_btree_ptr;
 struct xfs_dqtrx;
 struct xfs_icwalk;
 struct xfs_perag;
+struct xfbtree;
+struct xfs_btree_ops;
 
 #define XFS_ATTR_FILTER_FLAGS \
 	{ XFS_ATTR_ROOT,	"ROOT" }, \
@@ -640,6 +642,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bdetach);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
@@ -2499,12 +2502,19 @@ TRACE_EVENT(xfs_btree_alloc_block,
 	),
 	TP_fast_assign(
 		__entry->dev = cur->bc_mp->m_super->s_dev;
-		if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
+		switch (cur->bc_ops->type) {
+		case XFS_BTREE_TYPE_INODE:
 			__entry->agno = 0;
 			__entry->ino = cur->bc_ino.ip->i_ino;
-		} else {
+			break;
+		case XFS_BTREE_TYPE_AG:
 			__entry->agno = cur->bc_ag.pag->pag_agno;
 			__entry->ino = 0;
+			break;
+		case XFS_BTREE_TYPE_MEM:
+			__entry->agno = 0;
+			__entry->ino = 0;
+			break;
 		}
 		__assign_str(name, cur->bc_ops->name);
 		__entry->error = error;
@@ -4514,6 +4524,159 @@ DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents);
 
 #endif /* CONFIG_XFS_DRAIN_INTENTS */
 
+#ifdef CONFIG_XFS_MEMORY_BUFS
+TRACE_EVENT(xmbuf_create,
+	TP_PROTO(struct xfs_buftarg *btp),
+	TP_ARGS(btp),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, ino)
+		__array(char, pathname, 256)
+	),
+	TP_fast_assign(
+		char		pathname[257];
+		char		*path;
+		struct file	*file = btp->bt_file;
+
+		__entry->ino = file_inode(file)->i_ino;
+		memset(pathname, 0, sizeof(pathname));
+		path = file_path(file, pathname, sizeof(pathname) - 1);
+		if (IS_ERR(path))
+			path = "(unknown)";
+		strncpy(__entry->pathname, path, sizeof(__entry->pathname));
+	),
+	TP_printk("xmino 0x%lx path '%s'",
+		  __entry->ino,
+		  __entry->pathname)
+);
+
+TRACE_EVENT(xmbuf_free,
+	TP_PROTO(struct xfs_buftarg *btp),
+	TP_ARGS(btp),
+	TP_STRUCT__entry(
+		__field(unsigned long, ino)
+		__field(unsigned long long, bytes)
+		__field(loff_t, size)
+	),
+	TP_fast_assign(
+		struct file	*file = btp->bt_file;
+		struct inode	*inode = file_inode(file);
+
+		__entry->size = i_size_read(inode);
+		__entry->bytes = (inode->i_blocks << SECTOR_SHIFT) + inode->i_bytes;
+		__entry->ino = inode->i_ino;
+	),
+	TP_printk("xmino 0x%lx mem_bytes 0x%llx isize 0x%llx",
+		  __entry->ino,
+		  __entry->bytes,
+		  __entry->size)
+);
+#endif /* CONFIG_XFS_MEMORY_BUFS */
+
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+TRACE_EVENT(xfbtree_init,
+	TP_PROTO(struct xfs_mount *mp, struct xfbtree *xfbt,
+		 const struct xfs_btree_ops *ops),
+	TP_ARGS(mp, xfbt, ops),
+	TP_STRUCT__entry(
+		__field(const void *, btree_ops)
+		__field(unsigned long, xfino)
+		__field(unsigned int, leaf_mxr)
+		__field(unsigned int, leaf_mnr)
+		__field(unsigned int, node_mxr)
+		__field(unsigned int, node_mnr)
+		__field(unsigned long long, owner)
+	),
+	TP_fast_assign(
+		__entry->btree_ops = ops;
+		__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
+		__entry->leaf_mxr = xfbt->maxrecs[0];
+		__entry->node_mxr = xfbt->maxrecs[1];
+		__entry->leaf_mnr = xfbt->minrecs[0];
+		__entry->node_mnr = xfbt->minrecs[1];
+		__entry->owner = xfbt->owner;
+	),
+	TP_printk("xfino 0x%lx btree_ops %pS owner 0x%llx leaf_mxr %u leaf_mnr %u node_mxr %u node_mnr %u",
+		  __entry->xfino,
+		  __entry->btree_ops,
+		  __entry->owner,
+		  __entry->leaf_mxr,
+		  __entry->leaf_mnr,
+		  __entry->node_mxr,
+		  __entry->node_mnr)
+);
+
+DECLARE_EVENT_CLASS(xfbtree_buf_class,
+	TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp),
+	TP_ARGS(xfbt, bp),
+	TP_STRUCT__entry(
+		__field(unsigned long, xfino)
+		__field(xfs_daddr_t, bno)
+		__field(int, nblks)
+		__field(int, hold)
+		__field(int, pincount)
+		__field(unsigned int, lockval)
+		__field(unsigned int, flags)
+	),
+	TP_fast_assign(
+		__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
+		__entry->bno = xfs_buf_daddr(bp);
+		__entry->nblks = bp->b_length;
+		__entry->hold = atomic_read(&bp->b_hold);
+		__entry->pincount = atomic_read(&bp->b_pin_count);
+		__entry->lockval = bp->b_sema.count;
+		__entry->flags = bp->b_flags;
+	),
+	TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
+		  __entry->xfino,
+		  (unsigned long long)__entry->bno,
+		  __entry->nblks,
+		  __entry->hold,
+		  __entry->pincount,
+		  __entry->lockval,
+		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
+)
+
+#define DEFINE_XFBTREE_BUF_EVENT(name) \
+DEFINE_EVENT(xfbtree_buf_class, name, \
+	TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp), \
+	TP_ARGS(xfbt, bp))
+DEFINE_XFBTREE_BUF_EVENT(xfbtree_create_root_buf);
+DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_commit_buf);
+DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_cancel_buf);
+
+DECLARE_EVENT_CLASS(xfbtree_freesp_class,
+	TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur,
+		 xfs_fileoff_t fileoff),
+	TP_ARGS(xfbt, cur, fileoff),
+	TP_STRUCT__entry(
+		__field(unsigned long, xfino)
+		__string(btname, cur->bc_ops->name)
+		__field(int, nlevels)
+		__field(xfs_fileoff_t, fileoff)
+	),
+	TP_fast_assign(
+		__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
+		__assign_str(btname, cur->bc_ops->name);
+		__entry->nlevels = cur->bc_nlevels;
+		__entry->fileoff = fileoff;
+	),
+	TP_printk("xfino 0x%lx %sbt nlevels %d fileoff 0x%llx",
+		  __entry->xfino,
+		  __get_str(btname),
+		  __entry->nlevels,
+		  (unsigned long long)__entry->fileoff)
+)
+
+#define DEFINE_XFBTREE_FREESP_EVENT(name) \
+DEFINE_EVENT(xfbtree_freesp_class, name, \
+	TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur, \
+		 xfs_fileoff_t fileoff), \
+	TP_ARGS(xfbt, cur, fileoff))
+DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block);
+DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block);
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 08ce757c7454..3f7e3a09a49f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -215,6 +215,7 @@ struct xfs_buf	*xfs_trans_getsb(struct xfs_trans *);
 
 void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
+void		xfs_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
 void		xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 6549e50d852c..e28ab74af4f0 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -393,6 +393,48 @@ xfs_trans_brelse(
 }
 
 /*
+ * Forcibly detach a buffer previously joined to the transaction.  The caller
+ * will retain its locked reference to the buffer after this function returns.
+ * The buffer must be completely clean and must not be held to the transaction.
+ */
+void
+xfs_trans_bdetach(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buf_log_item	*bip = bp->b_log_item;
+
+	ASSERT(tp != NULL);
+	ASSERT(bp->b_transp == tp);
+	ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
+	ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
+	trace_xfs_trans_bdetach(bip);
+
+	/*
+	 * Erase all recursion count, since we're removing this buffer from the
+	 * transaction.
+	 */
+	bip->bli_recur = 0;
+
+	/*
+	 * The buffer must be completely clean.  Specifically, it had better
+	 * not be dirty, stale, logged, ordered, or held to the transaction.
+	 */
+	ASSERT(!test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags));
+	ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY));
+	ASSERT(!(bip->bli_flags & XFS_BLI_HOLD));
+	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
+	ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
+	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+
+	/* Unlink the log item from the transaction and drop the log item. */
+	xfs_trans_del_item(&bip->bli_item);
+	xfs_buf_item_put(bip);
+	bp->b_transp = NULL;
+}
+
+/*
  * Mark the buffer as not needing to be unlocked when the buf item's
  * iop_committing() routine is called.  The buffer must already be locked
  * and associated with the given transaction.