diff options
Diffstat (limited to 'fs/btrfs/reada.c')
-rw-r--r-- | fs/btrfs/reada.c | 81 |
1 files changed, 72 insertions, 9 deletions
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 9d4f5316a7e8..20fd4aa48a8c 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -52,6 +52,7 @@ struct reada_extctl { struct reada_extent { u64 logical; + u64 owner_root; struct btrfs_key top; struct list_head extctl; int refcnt; @@ -59,6 +60,7 @@ struct reada_extent { struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; int scheduled; + int level; }; struct reada_zone { @@ -87,7 +89,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); static void __reada_start_machine(struct btrfs_fs_info *fs_info); static int reada_add_block(struct reada_control *rc, u64 logical, - struct btrfs_key *top, u64 generation); + struct btrfs_key *top, u64 owner_root, + u64 generation, int level); /* recurses */ /* in case of err, eb might be NULL */ @@ -165,7 +168,9 @@ static void __readahead_hook(struct btrfs_fs_info *fs_info, if (rec->generation == generation && btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) - reada_add_block(rc, bytenr, &next_key, n_gen); + reada_add_block(rc, bytenr, &next_key, + btrfs_header_owner(eb), n_gen, + btrfs_header_level(eb) - 1); } } @@ -298,7 +303,8 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, u64 logical, - struct btrfs_key *top) + struct btrfs_key *top, + u64 owner_root, int level) { int ret; struct reada_extent *re = NULL; @@ -331,6 +337,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, INIT_LIST_HEAD(&re->extctl); spin_lock_init(&re->lock); re->refcnt = 1; + re->owner_root = owner_root; + re->level = level; /* * map block @@ -421,6 +429,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!dev->bdev) continue; + if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state)) + continue; + if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { /* @@ -445,6 +456,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, } have_zone = 1; } + if (!have_zone) + radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); up_read(&fs_info->dev_replace.rwsem); @@ -526,6 +539,8 @@ static void reada_zone_release(struct kref *kref) { struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); + lockdep_assert_held(&zone->device->fs_info->reada_lock); + radix_tree_delete(&zone->device->reada_zones, zone->end >> PAGE_SHIFT); @@ -541,14 +556,15 @@ static void reada_control_release(struct kref *kref) } static int reada_add_block(struct reada_control *rc, u64 logical, - struct btrfs_key *top, u64 generation) + struct btrfs_key *top, u64 owner_root, + u64 generation, int level) { struct btrfs_fs_info *fs_info = rc->fs_info; struct reada_extent *re; struct reada_extctl *rec; /* takes one ref */ - re = reada_find_extent(fs_info, logical, top); + re = reada_find_extent(fs_info, logical, top, owner_root, level); if (!re) return -1; @@ -640,12 +656,13 @@ static int reada_pick_zone(struct btrfs_device *dev) } static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, - int mirror_num, struct extent_buffer **eb) + u64 owner_root, int level, int mirror_num, + struct extent_buffer **eb) { struct extent_buffer *buf = NULL; int ret; - buf = btrfs_find_create_tree_block(fs_info, bytenr); + buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level); if (IS_ERR(buf)) return 0; @@ -733,7 +750,8 @@ static int reada_start_machine_dev(struct btrfs_device *dev) logical = re->logical; atomic_inc(&dev->reada_in_flight); - ret = reada_tree_block_flagged(fs_info, logical, mirror_num, &eb); + ret = reada_tree_block_flagged(fs_info, logical, re->owner_root, + re->level, mirror_num, &eb); if (ret) __readahead_hook(fs_info, re, NULL, ret); else if (eb) @@ -940,6 +958,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, u64 start; u64 generation; int ret; + int level; struct extent_buffer *node; static struct btrfs_key max_key = { .objectid = (u64)-1, @@ -962,9 +981,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, node = btrfs_root_node(root); start = node->start; generation = btrfs_header_generation(node); + level = btrfs_header_level(node); free_extent_buffer(node); - ret = reada_add_block(rc, start, &max_key, generation); + ret = reada_add_block(rc, start, &max_key, root->root_key.objectid, + generation, level); if (ret) { kfree(rc); return ERR_PTR(ret); @@ -1020,3 +1041,45 @@ void btrfs_reada_detach(void *handle) kref_put(&rc->refcnt, reada_control_release); } + +/* + * Before removing a device (device replace or device remove ioctls), call this + * function to wait for all existing readahead requests on the device and to + * make sure no one queues more readahead requests for the device. + * + * Must be called without holding neither the device list mutex nor the device + * replace semaphore, otherwise it will deadlock. + */ +void btrfs_reada_remove_dev(struct btrfs_device *dev) +{ + struct btrfs_fs_info *fs_info = dev->fs_info; + + /* Serialize with readahead extent creation at reada_find_extent(). */ + spin_lock(&fs_info->reada_lock); + set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state); + spin_unlock(&fs_info->reada_lock); + + /* + * There might be readahead requests added to the radix trees which + * were not yet added to the readahead work queue. We need to start + * them and wait for their completion, otherwise we can end up with + * use-after-free problems when dropping the last reference on the + * readahead extents and their zones, as they need to access the + * device structure. + */ + reada_start_machine(fs_info); + btrfs_flush_workqueue(fs_info->readahead_workers); +} + +/* + * If when removing a device (device replace or device remove ioctls) an error + * happens after calling btrfs_reada_remove_dev(), call this to undo what that + * function did. This is safe to call even if btrfs_reada_remove_dev() was not + * called before. + */ +void btrfs_reada_undo_remove_dev(struct btrfs_device *dev) +{ + spin_lock(&dev->fs_info->reada_lock); + clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state); + spin_unlock(&dev->fs_info->reada_lock); +} |