aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/reada.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/reada.c')
-rw-r--r--fs/btrfs/reada.c81
1 files changed, 72 insertions, 9 deletions
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 9d4f5316a7e8..20fd4aa48a8c 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -52,6 +52,7 @@ struct reada_extctl {
struct reada_extent {
u64 logical;
+ u64 owner_root;
struct btrfs_key top;
struct list_head extctl;
int refcnt;
@@ -59,6 +60,7 @@ struct reada_extent {
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
int scheduled;
+ int level;
};
struct reada_zone {
@@ -87,7 +89,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, u64 generation);
+ struct btrfs_key *top, u64 owner_root,
+ u64 generation, int level);
/* recurses */
/* in case of err, eb might be NULL */
@@ -165,7 +168,9 @@ static void __readahead_hook(struct btrfs_fs_info *fs_info,
if (rec->generation == generation &&
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
- reada_add_block(rc, bytenr, &next_key, n_gen);
+ reada_add_block(rc, bytenr, &next_key,
+ btrfs_header_owner(eb), n_gen,
+ btrfs_header_level(eb) - 1);
}
}
@@ -298,7 +303,8 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
u64 logical,
- struct btrfs_key *top)
+ struct btrfs_key *top,
+ u64 owner_root, int level)
{
int ret;
struct reada_extent *re = NULL;
@@ -331,6 +337,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock);
re->refcnt = 1;
+ re->owner_root = owner_root;
+ re->level = level;
/*
* map block
@@ -421,6 +429,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!dev->bdev)
continue;
+ if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
+ continue;
+
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
@@ -445,6 +456,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
}
have_zone = 1;
}
+ if (!have_zone)
+ radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
up_read(&fs_info->dev_replace.rwsem);
@@ -526,6 +539,8 @@ static void reada_zone_release(struct kref *kref)
{
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
+ lockdep_assert_held(&zone->device->fs_info->reada_lock);
+
radix_tree_delete(&zone->device->reada_zones,
zone->end >> PAGE_SHIFT);
@@ -541,14 +556,15 @@ static void reada_control_release(struct kref *kref)
}
static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, u64 generation)
+ struct btrfs_key *top, u64 owner_root,
+ u64 generation, int level)
{
struct btrfs_fs_info *fs_info = rc->fs_info;
struct reada_extent *re;
struct reada_extctl *rec;
/* takes one ref */
- re = reada_find_extent(fs_info, logical, top);
+ re = reada_find_extent(fs_info, logical, top, owner_root, level);
if (!re)
return -1;
@@ -640,12 +656,13 @@ static int reada_pick_zone(struct btrfs_device *dev)
}
static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
- int mirror_num, struct extent_buffer **eb)
+ u64 owner_root, int level, int mirror_num,
+ struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
int ret;
- buf = btrfs_find_create_tree_block(fs_info, bytenr);
+ buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
if (IS_ERR(buf))
return 0;
@@ -733,7 +750,8 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
logical = re->logical;
atomic_inc(&dev->reada_in_flight);
- ret = reada_tree_block_flagged(fs_info, logical, mirror_num, &eb);
+ ret = reada_tree_block_flagged(fs_info, logical, re->owner_root,
+ re->level, mirror_num, &eb);
if (ret)
__readahead_hook(fs_info, re, NULL, ret);
else if (eb)
@@ -940,6 +958,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
u64 start;
u64 generation;
int ret;
+ int level;
struct extent_buffer *node;
static struct btrfs_key max_key = {
.objectid = (u64)-1,
@@ -962,9 +981,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
node = btrfs_root_node(root);
start = node->start;
generation = btrfs_header_generation(node);
+ level = btrfs_header_level(node);
free_extent_buffer(node);
- ret = reada_add_block(rc, start, &max_key, generation);
+ ret = reada_add_block(rc, start, &max_key, root->root_key.objectid,
+ generation, level);
if (ret) {
kfree(rc);
return ERR_PTR(ret);
@@ -1020,3 +1041,45 @@ void btrfs_reada_detach(void *handle)
kref_put(&rc->refcnt, reada_control_release);
}
+
+/*
+ * Before removing a device (device replace or device remove ioctls), call this
+ * function to wait for all existing readahead requests on the device and to
+ * make sure no one queues more readahead requests for the device.
+ *
+ * Must be called without holding neither the device list mutex nor the device
+ * replace semaphore, otherwise it will deadlock.
+ */
+void btrfs_reada_remove_dev(struct btrfs_device *dev)
+{
+ struct btrfs_fs_info *fs_info = dev->fs_info;
+
+ /* Serialize with readahead extent creation at reada_find_extent(). */
+ spin_lock(&fs_info->reada_lock);
+ set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+ spin_unlock(&fs_info->reada_lock);
+
+ /*
+ * There might be readahead requests added to the radix trees which
+ * were not yet added to the readahead work queue. We need to start
+ * them and wait for their completion, otherwise we can end up with
+ * use-after-free problems when dropping the last reference on the
+ * readahead extents and their zones, as they need to access the
+ * device structure.
+ */
+ reada_start_machine(fs_info);
+ btrfs_flush_workqueue(fs_info->readahead_workers);
+}
+
+/*
+ * If when removing a device (device replace or device remove ioctls) an error
+ * happens after calling btrfs_reada_remove_dev(), call this to undo what that
+ * function did. This is safe to call even if btrfs_reada_remove_dev() was not
+ * called before.
+ */
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
+{
+ spin_lock(&dev->fs_info->reada_lock);
+ clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+ spin_unlock(&dev->fs_info->reada_lock);
+}