aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c159
1 files changed, 106 insertions, 53 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9a726ded2c6d..54bc8c7c6bcd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -101,7 +101,7 @@ int __init btrfs_end_io_wq_init(void)
btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
sizeof(struct btrfs_end_io_wq),
0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ SLAB_MEM_SPREAD,
NULL);
if (!btrfs_end_io_wq_cache)
return -ENOMEM;
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
u32 nritems = btrfs_header_nritems(leaf);
int slot;
- if (nritems == 0)
+ if (nritems == 0) {
+ struct btrfs_root *check_root;
+
+ key.objectid = btrfs_header_owner(leaf);
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+ /*
+ * The only reason we also check NULL here is that during
+ * open_ctree() some roots has not yet been set up.
+ */
+ if (!IS_ERR_OR_NULL(check_root)) {
+ /* if leaf is the root, then it's fine */
+ if (leaf->start !=
+ btrfs_root_bytenr(&check_root->root_item)) {
+ CORRUPT("non-root leaf's nritems is 0",
+ leaf, root, 0);
+ return -EIO;
+ }
+ }
return 0;
+ }
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+ unsigned long nr = btrfs_header_nritems(node);
+
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+ btrfs_crit(root->fs_info,
+ "corrupt node: block %llu root %llu nritems %lu",
+ node->start, root->objectid, nr);
+ return -EIO;
+ }
+ return 0;
+}
+
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
ret = -EIO;
}
+ if (found_level > 0 && check_node(root, eb))
+ ret = -EIO;
+
if (!ret)
set_extent_buffer_uptodate(eb);
err:
@@ -870,7 +907,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
atomic_inc(&fs_info->nr_async_submits);
- if (bio->bi_rw & REQ_SYNC)
+ if (bio->bi_opf & REQ_SYNC)
btrfs_set_work_high_priority(&async->work);
btrfs_queue_work(fs_info->workers, &async->work);
@@ -1140,7 +1177,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr)
{
- if (btrfs_test_is_dummy_root(root))
+ if (btrfs_is_testing(root->fs_info))
return alloc_test_extent_buffer(root->fs_info, bytenr,
root->nodesize);
return alloc_extent_buffer(root->fs_info, bytenr);
@@ -1227,6 +1264,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
+ bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
root->node = NULL;
root->commit_root = NULL;
root->sectorsize = sectorsize;
@@ -1281,14 +1319,14 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
- if (fs_info)
+ if (!dummy)
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
- if (fs_info)
+ if (!dummy)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
@@ -1309,17 +1347,20 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
+struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
- root = btrfs_alloc_root(NULL, GFP_KERNEL);
+ if (!fs_info)
+ return ERR_PTR(-EINVAL);
+
+ root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
/* We don't use the stripesize in selftest, set it as sectorsize */
- __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+ __setup_root(nodesize, sectorsize, sectorsize, root, fs_info,
BTRFS_ROOT_TREE_OBJECTID);
- set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
return root;
@@ -1594,14 +1635,14 @@ int btrfs_init_fs_root(struct btrfs_root *root)
ret = get_anon_bdev(&root->anon_dev);
if (ret)
- goto free_writers;
+ goto fail;
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
&root->highest_objectid);
if (ret) {
mutex_unlock(&root->objectid_mutex);
- goto free_root_dev;
+ goto fail;
}
ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
@@ -1609,19 +1650,13 @@ int btrfs_init_fs_root(struct btrfs_root *root)
mutex_unlock(&root->objectid_mutex);
return 0;
-
-free_root_dev:
- free_anon_bdev(root->anon_dev);
-free_writers:
- btrfs_free_subvolume_writers(root->subv_writers);
fail:
- kfree(root->free_ino_ctl);
- kfree(root->free_ino_pinned);
+ /* the caller is responsible to call free_fs_root */
return ret;
}
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
- u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+ u64 root_id)
{
struct btrfs_root *root;
@@ -2300,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
fs_info->qgroup_ulist = NULL;
+ fs_info->qgroup_rescan_running = false;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -2310,17 +2346,19 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
fs_info->workers =
- btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
- max_active, 16);
+ btrfs_alloc_workqueue(fs_info, "worker",
+ flags | WQ_HIGHPRI, max_active, 16);
fs_info->delalloc_workers =
- btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "delalloc",
+ flags, max_active, 2);
fs_info->flush_workers =
- btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "flush_delalloc",
+ flags, max_active, 0);
fs_info->caching_workers =
- btrfs_alloc_workqueue("cache", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
/*
* a higher idle thresh on the submit workers makes it much more
@@ -2328,41 +2366,48 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
* devices
*/
fs_info->submit_workers =
- btrfs_alloc_workqueue("submit", flags,
+ btrfs_alloc_workqueue(fs_info, "submit", flags,
min_t(u64, fs_devices->num_devices,
max_active), 64);
fs_info->fixup_workers =
- btrfs_alloc_workqueue("fixup", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
/*
* endios are largely parallel and should have a very
* low idle thresh
*/
fs_info->endio_workers =
- btrfs_alloc_workqueue("endio", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
fs_info->endio_meta_workers =
- btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
+ max_active, 4);
fs_info->endio_meta_write_workers =
- btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags,
+ max_active, 2);
fs_info->endio_raid56_workers =
- btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+ btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
+ max_active, 4);
fs_info->endio_repair_workers =
- btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
fs_info->rmw_workers =
- btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
fs_info->endio_write_workers =
- btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "endio-write", flags,
+ max_active, 2);
fs_info->endio_freespace_worker =
- btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
+ max_active, 0);
fs_info->delayed_workers =
- btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+ btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
+ max_active, 0);
fs_info->readahead_workers =
- btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+ btrfs_alloc_workqueue(fs_info, "readahead", flags,
+ max_active, 2);
fs_info->qgroup_rescan_workers =
- btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+ btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
fs_info->extent_workers =
- btrfs_alloc_workqueue("extent-refs", flags,
+ btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
min_t(u64, fs_devices->num_devices,
max_active), 8);
@@ -2617,6 +2662,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
+ fs_info->fs_frozen = 0;
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
fs_info->metadata_ratio = 0;
@@ -3010,8 +3056,8 @@ retry_root_backup:
if (IS_ERR(fs_info->transaction_kthread))
goto fail_cleaner;
- if (!btrfs_test_opt(tree_root, SSD) &&
- !btrfs_test_opt(tree_root, NOSSD) &&
+ if (!btrfs_test_opt(tree_root->fs_info, SSD) &&
+ !btrfs_test_opt(tree_root->fs_info, NOSSD) &&
!fs_info->fs_devices->rotating) {
btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
btrfs_set_opt(fs_info->mount_opt, SSD);
@@ -3024,9 +3070,9 @@ retry_root_backup:
btrfs_apply_pending_changes(fs_info);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
+ if (btrfs_test_opt(tree_root->fs_info, CHECK_INTEGRITY)) {
ret = btrfsic_mount(tree_root, fs_devices,
- btrfs_test_opt(tree_root,
+ btrfs_test_opt(tree_root->fs_info,
CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
1 : 0,
fs_info->check_integrity_print_mask);
@@ -3042,7 +3088,7 @@ retry_root_backup:
/* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 &&
- !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
+ !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) {
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3083,7 +3129,7 @@ retry_root_backup:
if (sb->s_flags & MS_RDONLY)
return 0;
- if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+ if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "creating free space tree");
ret = btrfs_create_free_space_tree(fs_info);
@@ -3120,7 +3166,7 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
- if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "clearing free space tree");
ret = btrfs_clear_free_space_tree(fs_info);
@@ -3141,7 +3187,7 @@ retry_root_backup:
close_ctree(tree_root);
return ret;
}
- } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+ } else if (btrfs_test_opt(tree_root->fs_info, RESCAN_UUID_TREE) ||
fs_info->generation !=
btrfs_super_uuid_tree_generation(disk_super)) {
btrfs_info(fs_info, "checking UUID tree");
@@ -3218,7 +3264,7 @@ fail:
return err;
recovery_tree_root:
- if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
+ if (!btrfs_test_opt(tree_root->fs_info, USEBACKUPROOT))
goto fail_tree_roots;
free_root_pointers(fs_info, 0);
@@ -3634,7 +3680,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
int total_errors = 0;
u64 flags;
- do_barriers = !btrfs_test_opt(root, NOBARRIER);
+ do_barriers = !btrfs_test_opt(root->fs_info, NOBARRIER);
backup_super_roots(root->fs_info);
sb = root->fs_info->super_for_commit;
@@ -3732,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
- if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
btrfs_free_log(NULL, root);
+ if (root->reloc_root) {
+ free_extent_buffer(root->reloc_root->node);
+ free_extent_buffer(root->reloc_root->commit_root);
+ btrfs_put_fs_root(root->reloc_root);
+ root->reloc_root = NULL;
+ }
+ }
if (root->free_ino_pinned)
__btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3844,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
smp_mb();
/* wait for the qgroup rescan worker to stop */
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
/* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem);
@@ -3918,7 +3971,7 @@ void close_ctree(struct btrfs_root *root)
iput(fs_info->btree_inode);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ if (btrfs_test_opt(root->fs_info, CHECK_INTEGRITY))
btrfsic_unmount(root, fs_info->fs_devices);
#endif