aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_btree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs/xfs_btree.c')
-rw-r--r--fs/xfs/libxfs/xfs_btree.c25
1 files changed, 22 insertions, 3 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 4c16c8c31fcb..c4649cc624e1 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2913,9 +2913,22 @@ xfs_btree_split_worker(
}
/*
- * BMBT split requests often come in with little stack to work on. Push
+ * BMBT split requests often come in with little stack to work on so we push
* them off to a worker thread so there is lots of stack to use. For the other
* btree types, just call directly to avoid the context switch overhead here.
+ *
+ * Care must be taken here - the work queue rescuer thread introduces potential
+ * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
+ * AGFs to allocate blocks. A task being run by the rescuer could attempt to
+ * lock an AGF that is already locked by a task queued to run by the rescuer,
+ * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
+ * release it until the current thread it is running gains the lock.
+ *
+ * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
+ * already locked to allocate from. The only place that doesn't hold an AGF
+ * locked is unwritten extent conversion at IO completion, but that has already
+ * been offloaded to a worker thread and hence has no stack consumption issues
+ * we have to worry about.
*/
STATIC int /* error */
xfs_btree_split(
@@ -2929,7 +2942,8 @@ xfs_btree_split(
struct xfs_btree_split_args args;
DECLARE_COMPLETION_ONSTACK(done);
- if (cur->bc_btnum != XFS_BTNUM_BMAP)
+ if (cur->bc_btnum != XFS_BTNUM_BMAP ||
+ cur->bc_tp->t_highest_agno == NULLAGNUMBER)
return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
args.cur = cur;
@@ -4666,7 +4680,12 @@ xfs_btree_space_to_height(
const unsigned int *limits,
unsigned long long leaf_blocks)
{
- unsigned long long node_blocks = limits[1];
+ /*
+ * The root btree block can have fewer than minrecs pointers in it
+ * because the tree might not be big enough to require that amount of
+ * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
+ */
+ unsigned long long node_blocks = 2;
unsigned long long blocks_left = leaf_blocks - 1;
unsigned int height = 1;