From b6a535faed06c2bdfaf55b00025dfdcb1eadf980 Mon Sep 17 00:00:00 2001 From: Howard McLauchlan Date: Fri, 2 Feb 2018 11:09:01 -0800 Subject: btrfs: print error if primary super block write fails Presently, failing a primary super block write but succeeding in at least one super block write in general will appear to users as if nothing important went wrong. However, upon unmounting and re-mounting, the file system will be in a rolled back state. This was discovered with a BCC program that uses bpf_override_return() to fail super block writes. This patch outputs an error clarifying that the primary super block write has failed, so users can expect potentially erroneous behaviour. It also forces wait_dev_supers() to return an error to its caller if the primary super block write fails. Signed-off-by: Howard McLauchlan Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 21f34ad0d411..9f7ed4390b33 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3290,6 +3290,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) struct buffer_head *bh; int i; int errors = 0; + bool primary_failed = false; u64 bytenr; if (max_mirrors == 0) @@ -3306,11 +3307,16 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) BTRFS_SUPER_INFO_SIZE); if (!bh) { errors++; + if (i == 0) + primary_failed = true; continue; } wait_on_buffer(bh); - if (!buffer_uptodate(bh)) + if (!buffer_uptodate(bh)) { errors++; + if (i == 0) + primary_failed = true; + } /* drop our reference */ brelse(bh); @@ -3319,6 +3325,13 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) brelse(bh); } + /* log error, force error return */ + if (primary_failed) { + btrfs_err(device->fs_info, "error writing primary super block to device %llu", + device->devid); + return -1; + } + return errors < i ? 0 : -1; } -- cgit From f7b885befd05fa4f546cdc3e6c9a3b4a30484cd1 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 13 Feb 2018 17:50:42 +0800 Subject: btrfs: manage thread_pool mount option as %u The mount option thread_pool is always unsigned. Manage it that way all around. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/super.c | 13 ++++++------- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c96275c17e6..407f435245f5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -935,7 +935,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; - int thread_pool_size; + u32 thread_pool_size; struct kobject *space_info_kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9f7ed4390b33..47624407bb94 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2183,7 +2183,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { - int max_active = fs_info->thread_pool_size; + u32 max_active = fs_info->thread_pool_size; unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; fs_info->workers = @@ -2404,7 +2404,7 @@ int open_ctree(struct super_block *sb, int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; - int max_active; + u32 max_active; int clear_free_space_tree = 0; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4b817947e00f..790f3ab600cd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -345,7 +345,7 @@ static const match_table_t tokens = { {Opt_barrier, "barrier"}, {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, - {Opt_thread_pool, "thread_pool=%d"}, + {Opt_thread_pool, "thread_pool=%u"}, {Opt_compress, "compress"}, {Opt_compress_type, "compress=%s"}, {Opt_compress_force, "compress-force"}, @@ -594,12 +594,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, ret = match_int(&args[0], &intarg); if (ret) { goto out; - } else if (intarg > 0) { - info->thread_pool_size = intarg; - } else { + } else if (intarg == 0) { ret = -EINVAL; goto out; } + info->thread_pool_size = intarg; break; case Opt_max_inline: num = match_strdup(&args[0]); @@ -1284,7 +1283,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) - seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); + seq_printf(seq, ",thread_pool=%u", info->thread_pool_size); if (btrfs_test_opt(info, COMPRESS)) { compress_type = btrfs_compress_type2str(info->compress_type); if (btrfs_test_opt(info, FORCE_COMPRESS)) @@ -1690,7 +1689,7 @@ out: } static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, - int new_pool_size, int old_pool_size) + u32 new_pool_size, u32 old_pool_size) { if (new_pool_size == old_pool_size) return; @@ -1758,7 +1757,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) unsigned long old_opts = fs_info->mount_opt; unsigned long old_compress_type = fs_info->compress_type; u64 old_max_inline = fs_info->max_inline; - int old_thread_pool_size = fs_info->thread_pool_size; + u32 old_thread_pool_size = fs_info->thread_pool_size; unsigned int old_metadata_ratio = fs_info->metadata_ratio; int ret; -- cgit From bf6d7d4900c0d0c15dcd1baa1b65e6519bf4abef Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 8 Feb 2018 18:25:17 +0200 Subject: btrfs: Remove invalid null checks from btrfs_cleanup_dirty_bgs list_first_entry is essentially a wrapper over cotnainer_of. The latter can never return null even if it's working on inconsistent list since it will either crash or return some offset in the wrong struct. Additionally, for the dirty_bgs list the iteration is done under dirty_bgs_lock which ensures consistency of the list. Signed-off-by: Nikolay Borisov Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 47624407bb94..dcfdb1f17f1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4327,11 +4327,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group_cache, dirty_list); - if (!cache) { - btrfs_err(fs_info, "orphan block group dirty_bgs list"); - spin_unlock(&cur_trans->dirty_bgs_lock); - return; - } if (!list_empty(&cache->io_list)) { spin_unlock(&cur_trans->dirty_bgs_lock); @@ -4355,10 +4350,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, cache = list_first_entry(&cur_trans->io_bgs, struct btrfs_block_group_cache, io_list); - if (!cache) { - btrfs_err(fs_info, "orphan block group on io_bgs list"); - return; - } list_del_init(&cache->io_list); spin_lock(&cache->lock); -- cgit From 45ae2c1841c31c90077cf427c09ea0e83e381026 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 8 Feb 2018 18:25:18 +0200 Subject: btrfs: Document consistency of transaction->io_bgs list The reason why io_bgs can be modified without holding any lock is non-obvious. Document it and reference that documentation from the respective call sites. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/extent-tree.c | 9 +++++++-- fs/btrfs/transaction.h | 16 ++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dcfdb1f17f1b..08e33f316a86 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4346,6 +4346,10 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, } spin_unlock(&cur_trans->dirty_bgs_lock); + /* + * Refer to the definition of io_bgs member for details why it's safe + * to use it without any locking + */ while (!list_empty(&cur_trans->io_bgs)) { cache = list_first_entry(&cur_trans->io_bgs, struct btrfs_block_group_cache, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b4d6789cc66..9a677860f64d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3741,8 +3741,9 @@ again: should_put = 0; /* - * the cache_write_mutex is protecting - * the io_list + * The cache_write_mutex is protecting the + * io_list, also refer to the definition of + * btrfs_transaction::io_bgs for more details */ list_add_tail(&cache->io_list, io); } else { @@ -3934,6 +3935,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, } spin_unlock(&cur_trans->dirty_bgs_lock); + /* + * Refer to the definition of io_bgs member for details why it's safe + * to use it without any locking + */ while (!list_empty(io)) { cache = list_first_entry(io, struct btrfs_block_group_cache, io_list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 817fd7c9836b..2762c8d6191c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -69,6 +69,22 @@ struct btrfs_transaction { struct list_head pending_chunks; struct list_head switch_commits; struct list_head dirty_bgs; + + /* + * There is no explicit lock which protects io_bgs, rather its + * consistency is implied by the fact that all the sites which modify + * it do so under some form of transaction critical section, namely: + * + * - btrfs_start_dirty_block_groups - This function can only ever be + * run by one of the transaction committers. Refer to + * BTRFS_TRANS_DIRTY_BG_RUN usage in btrfs_commit_transaction + * + * - btrfs_write_dirty_blockgroups - this is called by + * commit_cowonly_roots from transaction critical section + * (TRANS_STATE_COMMIT_DOING) + * + * - btrfs_cleanup_dirty_bgs - called on transaction abort + */ struct list_head io_bgs; struct list_head dropped_roots; -- cgit From 2afb9653bf56a32ac4955527357428a8c4fdd5b8 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 15 Feb 2018 11:36:07 +0800 Subject: btrfs: remove unused function btrfs_async_submit_limit() Commit [1] removed the need to use btrfs_async_submit_limit(), so delete it. [1] commit 736cd52e0c720103f52ab9da47b6cc3af6b083f6 Btrfs: remove nr_async_submits and async_submit_draining Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 -------- fs/btrfs/disk-io.h | 1 - 2 files changed, 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 08e33f316a86..b23961a4af52 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -710,14 +710,6 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) -{ - unsigned long limit = min_t(unsigned long, - info->thread_pool_size, - info->fs_devices->open_devices); - return 256 * limit; -} - static void run_one_async_start(struct btrfs_work *work) { struct async_submit_bio *async; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 301151a50ac1..e2ac6a14150a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -133,7 +133,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, u64 bio_offset, void *private_data, extent_submit_bio_hook_t *submit_bio_start, extent_submit_bio_hook_t *submit_bio_done); -unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); int btrfs_write_tree_block(struct extent_buffer *buf); void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, -- cgit From bc5511d0eda1c2534aa3cedb485eae1fc354f2dc Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 23 Jan 2018 14:46:53 +0200 Subject: btrfs: Use schedule_timeout_interruptible Instead of manually fiddling with the state of the task (RUNNING->INTERRUPTIBLE->RUNNING) again just use schedule_timeout_interruptible which adjusts the task state as needed. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b23961a4af52..fa80de56340f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1800,12 +1800,10 @@ sleep: if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))) btrfs_cleanup_transaction(fs_info); - set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && (!btrfs_transaction_blocked(fs_info) || cannot_commit)) - schedule_timeout(delay); - __set_current_state(TASK_RUNNING); + schedule_timeout_interruptible(delay); } while (!kthread_should_stop()); return 0; } -- cgit From 2f659546c9048931c2b8e146824a892b74a8e33c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 25 Jan 2018 14:56:18 +0800 Subject: btrfs: tree-checker: Replace root parameter with fs_info When inspecting the error message with real corruption, the "root=%llu" always shows "1" (root tree), instead of the correct owner. The problem is that we are getting @root from page->mapping->host, which points the same btree inode, so we will always get the same root. This makes the root owner output meaningless, and harder to port tree-checker to btrfs-progs. So get rid of the false and meaningless @root parameter and replace it with @fs_info. To get the owner, we can only rely on btrfs_header_owner() now. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +- fs/btrfs/tree-checker.c | 147 ++++++++++++++++++++++++------------------------ fs/btrfs/tree-checker.h | 7 ++- 3 files changed, 82 insertions(+), 78 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fa80de56340f..9d2c932b012e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -602,12 +602,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf_full(root, eb)) { + if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } - if (found_level > 0 && btrfs_check_node(root, eb)) + if (found_level > 0 && btrfs_check_node(fs_info, eb)) ret = -EIO; if (!ret) @@ -3854,7 +3854,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) * So here we should only check item pointers, not item data. */ if (btrfs_header_level(buf) == 0 && - btrfs_check_leaf_relaxed(root, buf)) { + btrfs_check_leaf_relaxed(fs_info, buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index c3c8d48f6618..a5244f98f3b4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -53,7 +53,7 @@ * Allows callers to customize the output. */ __printf(4, 5) -static void generic_err(const struct btrfs_root *root, +static void generic_err(const struct btrfs_fs_info *fs_info, const struct extent_buffer *eb, int slot, const char *fmt, ...) { @@ -65,10 +65,10 @@ static void generic_err(const struct btrfs_root *root, vaf.fmt = fmt; vaf.va = &args; - btrfs_crit(root->fs_info, + btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d, %pV", btrfs_header_level(eb) == 0 ? "leaf" : "node", - root->objectid, btrfs_header_bytenr(eb), slot, &vaf); + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf); va_end(args); } @@ -77,7 +77,7 @@ static void generic_err(const struct btrfs_root *root, * offset has its own meaning. */ __printf(4, 5) -static void file_extent_err(const struct btrfs_root *root, +static void file_extent_err(const struct btrfs_fs_info *fs_info, const struct extent_buffer *eb, int slot, const char *fmt, ...) { @@ -91,10 +91,11 @@ static void file_extent_err(const struct btrfs_root *root, vaf.fmt = fmt; vaf.va = &args; - btrfs_crit(root->fs_info, + btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV", - btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, - btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf); + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, key.offset, &vaf); va_end(args); } @@ -102,26 +103,26 @@ static void file_extent_err(const struct btrfs_root *root, * Return 0 if the btrfs_file_extent_##name is aligned to @alignment * Else return 1 */ -#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment) \ +#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment) \ ({ \ if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \ - file_extent_err((root), (leaf), (slot), \ + file_extent_err((fs_info), (leaf), (slot), \ "invalid %s for file extent, have %llu, should be aligned to %u", \ (#name), btrfs_file_extent_##name((leaf), (fi)), \ (alignment)); \ (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ }) -static int check_extent_data_item(struct btrfs_root *root, +static int check_extent_data_item(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_file_extent_item *fi; - u32 sectorsize = root->fs_info->sectorsize; + u32 sectorsize = fs_info->sectorsize; u32 item_size = btrfs_item_size_nr(leaf, slot); if (!IS_ALIGNED(key->offset, sectorsize)) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "unaligned file_offset for file extent, have %llu should be aligned to %u", key->offset, sectorsize); return -EUCLEAN; @@ -130,7 +131,7 @@ static int check_extent_data_item(struct btrfs_root *root, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "invalid type for file extent, have %u expect range [0, %u]", btrfs_file_extent_type(leaf, fi), BTRFS_FILE_EXTENT_TYPES); @@ -142,14 +143,14 @@ static int check_extent_data_item(struct btrfs_root *root, * and must be caught in open_ctree(). */ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "invalid compression for file extent, have %u expect range [0, %u]", btrfs_file_extent_compression(leaf, fi), BTRFS_COMPRESS_TYPES); return -EUCLEAN; } if (btrfs_file_extent_encryption(leaf, fi)) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "invalid encryption for file extent, have %u expect 0", btrfs_file_extent_encryption(leaf, fi)); return -EUCLEAN; @@ -157,7 +158,7 @@ static int check_extent_data_item(struct btrfs_root *root, if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { /* Inline extent must have 0 as key offset */ if (key->offset) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "invalid file_offset for inline file extent, have %llu expect 0", key->offset); return -EUCLEAN; @@ -171,7 +172,7 @@ static int check_extent_data_item(struct btrfs_root *root, /* Uncompressed inline extent size must match item size */ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + btrfs_file_extent_ram_bytes(leaf, fi)) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + btrfs_file_extent_ram_bytes(leaf, fi)); @@ -182,40 +183,41 @@ static int check_extent_data_item(struct btrfs_root *root, /* Regular or preallocated extent has fixed item size */ if (item_size != sizeof(*fi)) { - file_extent_err(root, leaf, slot, + file_extent_err(fs_info, leaf, slot, "invalid item size for reg/prealloc file extent, have %u expect %zu", item_size, sizeof(*fi)); return -EUCLEAN; } - if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) || - CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) || - CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) || - CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) || - CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize)) + if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) || + CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) || + CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) || + CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) || + CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize)) return -EUCLEAN; return 0; } -static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, - struct btrfs_key *key, int slot) +static int check_csum_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, struct btrfs_key *key, + int slot) { - u32 sectorsize = root->fs_info->sectorsize; - u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); + u32 sectorsize = fs_info->sectorsize; + u32 csumsize = btrfs_super_csum_size(fs_info->super_copy); if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "invalid key objectid for csum item, have %llu expect %llu", key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); return -EUCLEAN; } if (!IS_ALIGNED(key->offset, sectorsize)) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "unaligned key offset for csum item, have %llu should be aligned to %u", key->offset, sectorsize); return -EUCLEAN; } if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "unaligned item size for csum item, have %u should be aligned to %u", btrfs_item_size_nr(leaf, slot), csumsize); return -EUCLEAN; @@ -228,7 +230,7 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, * which represents inode number */ __printf(4, 5) -static void dir_item_err(const struct btrfs_root *root, +static void dir_item_err(const struct btrfs_fs_info *fs_info, const struct extent_buffer *eb, int slot, const char *fmt, ...) { @@ -242,14 +244,15 @@ static void dir_item_err(const struct btrfs_root *root, vaf.fmt = fmt; vaf.va = &args; - btrfs_crit(root->fs_info, + btrfs_crit(fs_info, "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", - btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, - btrfs_header_bytenr(eb), slot, key.objectid, &vaf); + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, &vaf); va_end(args); } -static int check_dir_item(struct btrfs_root *root, +static int check_dir_item(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, struct btrfs_key *key, int slot) { @@ -268,7 +271,7 @@ static int check_dir_item(struct btrfs_root *root, /* header itself should not cross item boundary */ if (cur + sizeof(*di) > item_size) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "dir item header crosses item boundary, have %zu boundary %u", cur + sizeof(*di), item_size); return -EUCLEAN; @@ -277,7 +280,7 @@ static int check_dir_item(struct btrfs_root *root, /* dir type check */ dir_type = btrfs_dir_type(leaf, di); if (dir_type >= BTRFS_FT_MAX) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "invalid dir item type, have %u expect [0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; @@ -285,14 +288,14 @@ static int check_dir_item(struct btrfs_root *root, if (key->type == BTRFS_XATTR_ITEM_KEY && dir_type != BTRFS_FT_XATTR) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "invalid dir item type for XATTR key, have %u expect %u", dir_type, BTRFS_FT_XATTR); return -EUCLEAN; } if (dir_type == BTRFS_FT_XATTR && key->type != BTRFS_XATTR_ITEM_KEY) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "xattr dir type found for non-XATTR key"); return -EUCLEAN; } @@ -305,21 +308,21 @@ static int check_dir_item(struct btrfs_root *root, name_len = btrfs_dir_name_len(leaf, di); data_len = btrfs_dir_data_len(leaf, di); if (name_len > max_name_len) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "dir item name len too long, have %u max %u", name_len, max_name_len); return -EUCLEAN; } - if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) { - dir_item_err(root, leaf, slot, + if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) { + dir_item_err(fs_info, leaf, slot, "dir item name and data len too long, have %u max %u", name_len + data_len, - BTRFS_MAX_XATTR_SIZE(root->fs_info)); + BTRFS_MAX_XATTR_SIZE(fs_info)); return -EUCLEAN; } if (data_len && dir_type != BTRFS_FT_XATTR) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "dir item with invalid data len, have %u expect 0", data_len); return -EUCLEAN; @@ -329,7 +332,7 @@ static int check_dir_item(struct btrfs_root *root, /* header and name/data should not cross item boundary */ if (cur + total_size > item_size) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "dir item data crosses item boundary, have %u boundary %u", cur + total_size, item_size); return -EUCLEAN; @@ -347,7 +350,7 @@ static int check_dir_item(struct btrfs_root *root, (unsigned long)(di + 1), name_len); name_hash = btrfs_name_hash(namebuf, name_len); if (key->offset != name_hash) { - dir_item_err(root, leaf, slot, + dir_item_err(fs_info, leaf, slot, "name hash mismatch with key, have 0x%016x expect 0x%016llx", name_hash, key->offset); return -EUCLEAN; @@ -362,7 +365,7 @@ static int check_dir_item(struct btrfs_root *root, /* * Common point to switch the item-specific validation. */ -static int check_leaf_item(struct btrfs_root *root, +static int check_leaf_item(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, struct btrfs_key *key, int slot) { @@ -370,24 +373,23 @@ static int check_leaf_item(struct btrfs_root *root, switch (key->type) { case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(root, leaf, key, slot); + ret = check_extent_data_item(fs_info, leaf, key, slot); break; case BTRFS_EXTENT_CSUM_KEY: - ret = check_csum_item(root, leaf, key, slot); + ret = check_csum_item(fs_info, leaf, key, slot); break; case BTRFS_DIR_ITEM_KEY: case BTRFS_DIR_INDEX_KEY: case BTRFS_XATTR_ITEM_KEY: - ret = check_dir_item(root, leaf, key, slot); + ret = check_dir_item(fs_info, leaf, key, slot); break; } return ret; } -static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, +static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, bool check_item_data) { - struct btrfs_fs_info *fs_info = root->fs_info; /* No valid key type is 0, so all key should be larger than this key */ struct btrfs_key prev_key = {0, 0, 0}; struct btrfs_key key; @@ -420,7 +422,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, eb = btrfs_root_node(check_root); /* if leaf is the root, then it's fine */ if (leaf != eb) { - generic_err(check_root, leaf, 0, + generic_err(fs_info, leaf, 0, "invalid nritems, have %u should not be 0 for non-root leaf", nritems); free_extent_buffer(eb); @@ -453,7 +455,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, /* Make sure the keys are in the right order */ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", prev_key.objectid, prev_key.type, prev_key.offset, key.objectid, key.type, @@ -472,7 +474,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, item_end_expected = btrfs_item_offset_nr(leaf, slot - 1); if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "unexpected item end, have %u expect %u", btrfs_item_end_nr(leaf, slot), item_end_expected); @@ -486,7 +488,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, */ if (btrfs_item_end_nr(leaf, slot) > BTRFS_LEAF_DATA_SIZE(fs_info)) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "slot end outside of leaf, have %u expect range [0, %u]", btrfs_item_end_nr(leaf, slot), BTRFS_LEAF_DATA_SIZE(fs_info)); @@ -496,7 +498,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, /* Also check if the item pointer overlaps with btrfs item. */ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > btrfs_item_ptr_offset(leaf, slot)) { - generic_err(root, leaf, slot, + generic_err(fs_info, leaf, slot, "slot overlaps with its data, item end %lu data start %lu", btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item), @@ -509,7 +511,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, * Check if the item size and content meet other * criteria */ - ret = check_leaf_item(root, leaf, &key, slot); + ret = check_leaf_item(fs_info, leaf, &key, slot); if (ret < 0) return ret; } @@ -522,18 +524,19 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf, return 0; } -int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf) +int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf) { - return check_leaf(root, leaf, true); + return check_leaf(fs_info, leaf, true); } -int btrfs_check_leaf_relaxed(struct btrfs_root *root, +int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf) { - return check_leaf(root, leaf, false); + return check_leaf(fs_info, leaf, false); } -int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) +int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) { unsigned long nr = btrfs_header_nritems(node); struct btrfs_key key, next_key; @@ -541,12 +544,12 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) u64 bytenr; int ret = 0; - if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { - btrfs_crit(root->fs_info, + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) { + btrfs_crit(fs_info, "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", - root->objectid, node->start, + btrfs_header_owner(node), node->start, nr == 0 ? "small" : "large", nr, - BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)); + BTRFS_NODEPTRS_PER_BLOCK(fs_info)); return -EUCLEAN; } @@ -556,21 +559,21 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) btrfs_node_key_to_cpu(node, &next_key, slot + 1); if (!bytenr) { - generic_err(root, node, slot, + generic_err(fs_info, node, slot, "invalid NULL node pointer"); ret = -EUCLEAN; goto out; } - if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) { - generic_err(root, node, slot, + if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) { + generic_err(fs_info, node, slot, "unaligned pointer, have %llu should be aligned to %u", - bytenr, root->fs_info->sectorsize); + bytenr, fs_info->sectorsize); ret = -EUCLEAN; goto out; } if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - generic_err(root, node, slot, + generic_err(fs_info, node, slot, "bad key order, current (%llu %u %llu) next (%llu %u %llu)", key.objectid, key.type, key.offset, next_key.objectid, next_key.type, diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 3d53e8d6fda0..aba542755710 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -25,14 +25,15 @@ * Will check not only the item pointers, but also every possible member * in item data. */ -int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf); +int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf); /* * Less strict leaf checker. * Will only check item pointers, not reading item data. */ -int btrfs_check_leaf_relaxed(struct btrfs_root *root, +int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf); -int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); +int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); #endif -- cgit From 9678c54388b6a6b309ff7ee5c8d23fa9eba7c06f Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 8 Jan 2018 11:45:05 +0200 Subject: btrfs: Remove custom crc32c init code The custom crc32 init code was introduced in 14a958e678cd ("Btrfs: fix btrfs boot when compiled as built-in") to enable using btrfs as a built-in. However, later as pointed out by 60efa5eb2e88 ("Btrfs: use late_initcall instead of module_init") this wasn't enough and finally btrfs was switched to late_initcall which comes after the generic crc32c implementation is initiliased. The latter commit superseeded the former. Now that we don't have to maintain our own code let's just remove it and switch to using the generic implementation. Despite touching a lot of files the patch is really simple. Here is the gist of the changes: 1. Select LIBCRC32C rather than the low-level modules. 2. s/btrfs_crc32c/crc32c/g 3. replace hash.h with linux/crc32c.h 4. Move the btrfs namehash funcs to ctree.h and change the tree accordingly. I've tested this with btrfs being both a module and a built-in and xfstest doesn't complain. Does seem to fix the longstanding problem of not automatically selectiong the crc32c module when btrfs is used. Possibly there is a workaround in dracut. The modinfo confirms that now all the module dependencies are there: before: depends: zstd_compress,zstd_decompress,raid6_pq,xor,zlib_deflate after: depends: libcrc32c,zstd_compress,zstd_decompress,raid6_pq,xor,zlib_deflate Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba [ add more info to changelog from mails ] Signed-off-by: David Sterba --- fs/btrfs/Kconfig | 3 +-- fs/btrfs/Makefile | 2 +- fs/btrfs/check-integrity.c | 4 ++-- fs/btrfs/ctree.h | 16 ++++++++++++++ fs/btrfs/dir-item.c | 1 - fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent-tree.c | 10 ++++----- fs/btrfs/hash.c | 54 ---------------------------------------------- fs/btrfs/hash.h | 43 ------------------------------------ fs/btrfs/inode-item.c | 1 - fs/btrfs/inode.c | 1 - fs/btrfs/props.c | 2 +- fs/btrfs/send.c | 4 ++-- fs/btrfs/super.c | 14 ++++-------- fs/btrfs/tree-checker.c | 1 - fs/btrfs/tree-log.c | 2 +- 16 files changed, 35 insertions(+), 127 deletions(-) delete mode 100644 fs/btrfs/hash.c delete mode 100644 fs/btrfs/hash.h (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 273351ee4c46..167e5dc7eadd 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -1,7 +1,6 @@ config BTRFS_FS tristate "Btrfs filesystem support" - select CRYPTO - select CRYPTO_CRC32C + select LIBCRC32C select ZLIB_INFLATE select ZLIB_DEFLATE select LZO_COMPRESS diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0c4373628eb4..ca693dd554e9 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ - uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o + uuid-tree.o props.o free-space-tree.o tree-checker.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7d51b5a5b505..3baebbc021c5 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -96,9 +96,9 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" -#include "hash.h" #include "transaction.h" #include "extent_io.h" #include "volumes.h" @@ -1736,7 +1736,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, size_t sublen = i ? PAGE_SIZE : (PAGE_SIZE - BTRFS_CSUM_SIZE); - crc = btrfs_crc32c(crc, data, sublen); + crc = crc32c(crc, data, sublen); } btrfs_csum_final(crc, csum); if (memcmp(csum, h->csum, state->csum_size)) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a4877b6959e3..92b9db7186bb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,6 +40,7 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -98,6 +99,7 @@ static const int btrfs_csum_sizes[] = { 4 }; #define BTRFS_MAX_EXTENT_SIZE SZ_128M + /* * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size */ @@ -2553,6 +2555,20 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \ btrfs_item_offset_nr(leaf, slot))) +static inline u64 btrfs_name_hash(const char *name, int len) +{ + return crc32c((u32)~1, name, len); +} + +/* + * Figure the key offset of an extended inode ref + */ +static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, + int len) +{ + return (u64) crc32c(parent_objectid, name, len); +} + static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) { return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index cbe421605cd5..29e967b2c667 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -18,7 +18,6 @@ #include "ctree.h" #include "disk-io.h" -#include "hash.h" #include "transaction.h" /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d2c932b012e..c10c84640eee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -31,10 +31,10 @@ #include #include #include +#include #include #include "ctree.h" #include "disk-io.h" -#include "hash.h" #include "transaction.h" #include "btrfs_inode.h" #include "volumes.h" @@ -270,7 +270,7 @@ out: u32 btrfs_csum_data(const char *data, u32 seed, size_t len) { - return btrfs_crc32c(seed, data, len); + return crc32c(seed, data, len); } void btrfs_csum_final(u32 crc, u8 *result) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 22ac82198a54..2760292e1175 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -27,7 +27,7 @@ #include #include #include -#include "hash.h" +#include #include "tree-log.h" #include "disk-io.h" #include "print-tree.h" @@ -1203,11 +1203,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) __le64 lenum; lenum = cpu_to_le64(root_objectid); - high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); + high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(owner); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(offset); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); return ((u64)high_crc << 31) ^ (u64)low_crc; } @@ -5944,7 +5944,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, */ u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), + trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), num_bytes, 1); return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); } diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c deleted file mode 100644 index baacc1866861..000000000000 --- a/fs/btrfs/hash.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2014 Filipe David Borba Manana - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#include -#include -#include "hash.h" - -static struct crypto_shash *tfm; - -int __init btrfs_hash_init(void) -{ - tfm = crypto_alloc_shash("crc32c", 0, 0); - - return PTR_ERR_OR_ZERO(tfm); -} - -const char* btrfs_crc32c_impl(void) -{ - return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm)); -} - -void btrfs_hash_exit(void) -{ - crypto_free_shash(tfm); -} - -u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) -{ - SHASH_DESC_ON_STACK(shash, tfm); - u32 *ctx = (u32 *)shash_desc_ctx(shash); - u32 retval; - int err; - - shash->tfm = tfm; - shash->flags = 0; - *ctx = crc; - - err = crypto_shash_update(shash, address, length); - BUG_ON(err); - - retval = *ctx; - barrier_data(ctx); - return retval; -} diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h deleted file mode 100644 index c3a2ec554361..000000000000 --- a/fs/btrfs/hash.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __HASH__ -#define __HASH__ - -int __init btrfs_hash_init(void); - -void btrfs_hash_exit(void); -const char* btrfs_crc32c_impl(void); - -u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length); - -static inline u64 btrfs_name_hash(const char *name, int len) -{ - return btrfs_crc32c((u32)~1, name, len); -} - -/* - * Figure the key offset of an extended inode ref - */ -static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, - int len) -{ - return (u64) btrfs_crc32c(parent_objectid, name, len); -} - -#endif diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 65e1a76bf755..1d5631ef2738 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -18,7 +18,6 @@ #include "ctree.h" #include "disk-io.h" -#include "hash.h" #include "transaction.h" #include "print-tree.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b55b47f493e9..6504e63b2317 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -58,7 +58,6 @@ #include "free-space-cache.h" #include "inode-map.h" #include "backref.h" -#include "hash.h" #include "props.h" #include "qgroup.h" #include "dedupe.h" diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index b30a056963ab..e4ac24175524 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -19,8 +19,8 @@ #include #include "props.h" #include "btrfs_inode.h" -#include "hash.h" #include "transaction.h" +#include "ctree.h" #include "xattr.h" #include "compression.h" diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 484e2af793de..b0c5d710183e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -27,10 +27,10 @@ #include #include #include +#include #include "send.h" #include "backref.h" -#include "hash.h" #include "locking.h" #include "disk-io.h" #include "btrfs_inode.h" @@ -695,7 +695,7 @@ static int send_cmd(struct send_ctx *sctx) hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); hdr->crc = 0; - crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); + crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); hdr->crc = cpu_to_le32(crc); ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 540c18511e7a..5d752f791950 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "delayed-inode.h" #include "ctree.h" @@ -48,7 +49,6 @@ #include "transaction.h" #include "btrfs_inode.h" #include "print-tree.h" -#include "hash.h" #include "props.h" #include "xattr.h" #include "volumes.h" @@ -2357,22 +2357,18 @@ static void __init btrfs_print_mod_info(void) ", ref-verify=on" #endif "\n", - btrfs_crc32c_impl()); + crc32c_impl()); } static int __init init_btrfs_fs(void) { int err; - err = btrfs_hash_init(); - if (err) - return err; - btrfs_props_init(); err = btrfs_init_sysfs(); if (err) - goto free_hash; + return err; btrfs_init_compress(); @@ -2453,8 +2449,7 @@ free_cachep: free_compress: btrfs_exit_compress(); btrfs_exit_sysfs(); -free_hash: - btrfs_hash_exit(); + return err; } @@ -2474,7 +2469,6 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_sysfs(); btrfs_cleanup_fs_uuids(); btrfs_exit_compress(); - btrfs_hash_exit(); } late_initcall(init_btrfs_fs); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index a5244f98f3b4..e96cfd93ae3f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -30,7 +30,6 @@ #include "tree-checker.h" #include "disk-io.h" #include "compression.h" -#include "hash.h" /* * Error message should follow the following format: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fac5fd1cc786..bbd8a40b2006 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -21,12 +21,12 @@ #include #include #include +#include "ctree.h" #include "tree-log.h" #include "disk-io.h" #include "locking.h" #include "print-tree.h" #include "backref.h" -#include "hash.h" #include "compression.h" #include "qgroup.h" #include "inode-map.h" -- cgit From e67c718b5b9a306bde7e966be7b4ca48fa063d73 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 19 Feb 2018 17:24:18 +0100 Subject: btrfs: add more __cold annotations The __cold functions are placed to a special section, as they're expected to be called rarely. This could help i-cache prefetches or help compiler to decide which branches are more/less likely to be taken without any other annotations needed. Though we can't add more __exit annotations, it's still possible to add __cold (that's also added with __exit). That way the following function categories are tagged: - printf wrappers, error messages - exit helpers Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- fs/btrfs/backref.h | 2 +- fs/btrfs/compression.c | 2 +- fs/btrfs/compression.h | 2 +- fs/btrfs/ctree.h | 9 +++++---- fs/btrfs/delayed-inode.c | 2 +- fs/btrfs/delayed-inode.h | 2 +- fs/btrfs/delayed-ref.c | 2 +- fs/btrfs/delayed-ref.h | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/extent_map.c | 2 +- fs/btrfs/extent_map.h | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/ordered-data.c | 2 +- fs/btrfs/ordered-data.h | 2 +- fs/btrfs/send.c | 1 + fs/btrfs/super.c | 2 +- fs/btrfs/sysfs.c | 2 +- fs/btrfs/tree-checker.c | 3 +++ 23 files changed, 29 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 26484648d090..4a33448cbb01 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -170,7 +170,7 @@ int __init btrfs_prelim_ref_init(void) return 0; } -void btrfs_prelim_ref_exit(void) +void __cold btrfs_prelim_ref_exit(void) { kmem_cache_destroy(btrfs_prelim_ref_cache); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 0c2fab8514ff..0a30028d5196 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -73,7 +73,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr); int __init btrfs_prelim_ref_init(void); -void btrfs_prelim_ref_exit(void); +void __cold btrfs_prelim_ref_exit(void); struct prelim_ref { struct rb_node rbnode; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 07d049c0c20f..562c3e633403 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1133,7 +1133,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, return ret; } -void btrfs_exit_compress(void) +void __cold btrfs_exit_compress(void) { free_workspaces(); } diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 677fa4aa0bd7..ce796557a918 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -76,7 +76,7 @@ struct compressed_bio { }; void __init btrfs_init_compress(void); -void btrfs_exit_compress(void); +void __cold btrfs_exit_compress(void); int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, u64 start, struct page **pages, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 92b9db7186bb..d6a2fc311187 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3204,7 +3204,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); int __init btrfs_init_cachep(void); -void btrfs_destroy_cachep(void); +void __cold btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root, int *was_new); @@ -3255,7 +3255,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, /* file.c */ int __init btrfs_auto_defrag_init(void); -void btrfs_auto_defrag_exit(void); +void __cold btrfs_auto_defrag_exit(void); int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); @@ -3290,7 +3290,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, /* sysfs.c */ int __init btrfs_init_sysfs(void); -void btrfs_exit_sysfs(void); +void __cold btrfs_exit_sysfs(void); int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info); void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); @@ -3302,13 +3302,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, unsigned long new_flags); int btrfs_sync_fs(struct super_block *sb, int wait); -static inline __printf(2, 3) +static inline __printf(2, 3) __cold void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) { } #ifdef CONFIG_PRINTK __printf(2, 3) +__cold void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); #else #define btrfs_printk(fs_info, fmt, args...) \ diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 09939fc37f2a..d06bef16ebd5 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -42,7 +42,7 @@ int __init btrfs_delayed_inode_init(void) return 0; } -void btrfs_delayed_inode_exit(void) +void __cold btrfs_delayed_inode_exit(void) { kmem_cache_destroy(delayed_node_cache); } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index ae893d85224f..100a91e26b55 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -149,7 +149,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, /* for init */ int __init btrfs_delayed_inode_init(void); -void btrfs_delayed_inode_exit(void); +void __cold btrfs_delayed_inode_exit(void); /* for debugging */ void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 7ab5e0128f0c..03bdf355107a 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -930,7 +930,7 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(&delayed_refs->href_root, bytenr, 0); } -void btrfs_delayed_ref_exit(void) +void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index c4f625e5a691..9e3e5aff0937 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -204,7 +204,7 @@ extern struct kmem_cache *btrfs_delayed_data_ref_cachep; extern struct kmem_cache *btrfs_delayed_extent_op_cachep; int __init btrfs_delayed_ref_init(void); -void btrfs_delayed_ref_exit(void); +void __cold btrfs_delayed_ref_exit(void); static inline struct btrfs_delayed_extent_op * btrfs_alloc_delayed_extent_op(void) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c10c84640eee..798e602c1834 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -110,7 +110,7 @@ int __init btrfs_end_io_wq_init(void) return 0; } -void btrfs_end_io_wq_exit(void) +void __cold btrfs_end_io_wq_exit(void) { kmem_cache_destroy(btrfs_end_io_wq_cache); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index e2ac6a14150a..aaf99529883d 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -153,7 +153,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode, int create); int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int __init btrfs_end_io_wq_init(void); -void btrfs_end_io_wq_exit(void); +void __cold btrfs_end_io_wq_exit(void); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4e73705b405e..da46e9372262 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -187,7 +187,7 @@ free_state_cache: return -ENOMEM; } -void extent_io_exit(void) +void __cold extent_io_exit(void) { btrfs_leak_debug_check(); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index da9be2fb0502..e359c5d4305c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -286,7 +286,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); int __init extent_io_init(void); -void extent_io_exit(void); +void __cold extent_io_exit(void); u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b8ead8dc2ebe..53a0633c6ef7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -19,7 +19,7 @@ int __init extent_map_init(void) return 0; } -void extent_map_exit(void) +void __cold extent_map_exit(void) { kmem_cache_destroy(extent_map_cache); } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index b29f77bc0732..f6f8ba114977 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -86,7 +86,7 @@ void replace_extent_mapping(struct extent_map_tree *tree, struct extent_map *alloc_extent_map(void); void free_extent_map(struct extent_map *em); int __init extent_map_init(void); -void extent_map_exit(void); +void __cold extent_map_exit(void); int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em); struct extent_map *search_extent_mapping(struct extent_map_tree *tree, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 41ab9073d1d4..a335e2e6c84d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3378,7 +3378,7 @@ const struct file_operations btrfs_file_operations = { .dedupe_file_range = btrfs_dedupe_file_range, }; -void btrfs_auto_defrag_exit(void) +void __cold btrfs_auto_defrag_exit(void) { kmem_cache_destroy(btrfs_inode_defrag_cachep); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 491a7397f6fa..bb5de52cbc09 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9478,7 +9478,7 @@ static void init_once(void *foo) inode_init_once(&ei->vfs_inode); } -void btrfs_destroy_cachep(void) +void __cold btrfs_destroy_cachep(void) { /* * Make sure all delayed rcu free inodes are flushed before we diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 5b311aeddcc8..9be98e42cfb6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1154,7 +1154,7 @@ int __init ordered_data_init(void) return 0; } -void ordered_data_exit(void) +void __cold ordered_data_exit(void) { kmem_cache_destroy(btrfs_ordered_extent_cache); } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c53e2cfb72d9..4a1672a13ba6 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -217,5 +217,5 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, struct btrfs_root *log, u64 transid); void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); int __init ordered_data_init(void); -void ordered_data_exit(void); +void __cold ordered_data_exit(void); #endif diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index b0c5d710183e..085542832b9a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -270,6 +270,7 @@ struct name_cache_entry { char name[]; }; +__cold static void inconsistent_snapshot_error(struct send_ctx *sctx, enum btrfs_compare_tree_result result, const char *what) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 07bc2bfbdb96..1dd2e785918c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2333,7 +2333,7 @@ static int __init btrfs_interface_init(void) return misc_register(&btrfs_misc); } -static void btrfs_interface_exit(void) +static __cold void btrfs_interface_exit(void) { misc_deregister(&btrfs_misc); } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index a8bafed931f4..6af7b58e1a90 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -923,7 +923,7 @@ out1: return ret; } -void btrfs_exit_sysfs(void) +void __cold btrfs_exit_sysfs(void) { sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); kset_unregister(btrfs_kset); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index e96cfd93ae3f..8871286c1a91 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -52,6 +52,7 @@ * Allows callers to customize the output. */ __printf(4, 5) +__cold static void generic_err(const struct btrfs_fs_info *fs_info, const struct extent_buffer *eb, int slot, const char *fmt, ...) @@ -76,6 +77,7 @@ static void generic_err(const struct btrfs_fs_info *fs_info, * offset has its own meaning. */ __printf(4, 5) +__cold static void file_extent_err(const struct btrfs_fs_info *fs_info, const struct extent_buffer *eb, int slot, const char *fmt, ...) @@ -229,6 +231,7 @@ static int check_csum_item(struct btrfs_fs_info *fs_info, * which represents inode number */ __printf(4, 5) +__cold static void dir_item_err(const struct btrfs_fs_info *fs_info, const struct extent_buffer *eb, int slot, const char *fmt, ...) -- cgit From 9b99b11564446600f3a3ce394a841d525b7993e7 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 27 Feb 2018 12:41:59 +0800 Subject: btrfs: rename btrfs_close_extra_device to btrfs_free_extra_devids This function btrfs_close_extra_devices() is about freeing extra devids which once it may have belonged to this filesystem. So rename it and add the comment. The _devid suffix is appropriate as this function won't handle devices which are outside of the filesytem being mounted. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/volumes.c | 6 +++++- fs/btrfs/volumes.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 798e602c1834..bbbcbf87ac93 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2758,10 +2758,10 @@ int open_ctree(struct super_block *sb, } /* - * keep the device that is marked to be the target device for the - * dev_replace procedure + * Keep the devid that is marked to be the target device for the + * device replace procedure */ - btrfs_close_extra_devices(fs_devices, 0); + btrfs_free_extra_devids(fs_devices, 0); if (!fs_devices->latest_bdev) { btrfs_err(fs_info, "failed to read devices"); @@ -2824,7 +2824,7 @@ retry_root_backup: goto fail_block_groups; } - btrfs_close_extra_devices(fs_devices, 1); + btrfs_free_extra_devids(fs_devices, 1); ret = btrfs_sysfs_add_fsid(fs_devices, NULL); if (ret) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b580ef08c368..1622d3b73771 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -896,7 +896,11 @@ error: return ERR_PTR(-ENOMEM); } -void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step) +/* + * After we have read the system tree and know devids belonging to + * this filesystem, remove the device which does not belong there. + */ +void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step) { struct btrfs_device *device, *next; struct btrfs_device *latest_dev = NULL; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d28f5745fee2..d1fcaea9fef5 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -422,7 +422,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct btrfs_fs_devices **fs_devices_ret); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); -void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step); +void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, struct btrfs_device *device, struct btrfs_device *this_dev); int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info, -- cgit From a758781d4b76c38374f155e2f2cf902e13b9e50e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 03:05:23 +0200 Subject: btrfs: separate types for submit_bio_start and submit_bio_done The callbacks make use of different parameters that are passed to the other type unnecessarily. This patch adds separate types for each and the unused parameters will be removed. The type extent_submit_bio_hook_t keeps all parameters and can be used where the start/done types are not appropriate. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/disk-io.h | 4 ++-- fs/btrfs/extent_io.h | 9 +++++++++ 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bbbcbf87ac93..ee5d29a0219f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -124,8 +124,8 @@ struct async_submit_bio { void *private_data; struct btrfs_fs_info *fs_info; struct bio *bio; - extent_submit_bio_hook_t *submit_bio_start; - extent_submit_bio_hook_t *submit_bio_done; + extent_submit_bio_start_t *submit_bio_start; + extent_submit_bio_done_t *submit_bio_done; int mirror_num; unsigned long bio_flags; /* @@ -751,8 +751,8 @@ static void run_one_async_free(struct btrfs_work *work) blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, void *private_data, - extent_submit_bio_hook_t *submit_bio_start, - extent_submit_bio_hook_t *submit_bio_done) + extent_submit_bio_start_t *submit_bio_start, + extent_submit_bio_done_t *submit_bio_done) { struct async_submit_bio *async; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index aaf99529883d..70a88d61b547 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -131,8 +131,8 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset, void *private_data, - extent_submit_bio_hook_t *submit_bio_start, - extent_submit_bio_hook_t *submit_bio_done); + extent_submit_bio_start_t *submit_bio_start, + extent_submit_bio_done_t *submit_bio_done); int btrfs_write_tree_block(struct extent_buffer *buf); void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c82a5842d524..bbfae2abfb39 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -95,6 +95,15 @@ struct io_failure_record; typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio, int mirror_num, unsigned long bio_flags, u64 bio_offset); + +typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset); + +typedef blk_status_t (extent_submit_bio_done_t)(void *private_data, + struct bio *bio, int mirror_num, unsigned long bio_flags, + u64 bio_offset); + struct extent_io_ops { /* * The following callbacks must be allways defined, the function -- cgit From d0779291b1e9666aa4aac46ffd8062e3c3b0f2ab Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 8 Mar 2018 13:47:33 +0100 Subject: btrfs: remove unused parameters from extent_submit_bio_start_t Remove parameters not used by any of the callbacks. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 4 +--- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ee5d29a0219f..6fe5a959d56b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -717,7 +717,6 @@ static void run_one_async_start(struct btrfs_work *work) async = container_of(work, struct async_submit_bio, work); ret = async->submit_bio_start(async->private_data, async->bio, - async->mirror_num, async->bio_flags, async->bio_offset); if (ret) async->status = ret; @@ -800,7 +799,6 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) } static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, u64 bio_offset) { /* diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bbfae2abfb39..6596b697b827 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -97,8 +97,7 @@ typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio * u64 bio_offset); typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, - struct bio *bio, int mirror_num, unsigned long bio_flags, - u64 bio_offset); + struct bio *bio, u64 bio_offset); typedef blk_status_t (extent_submit_bio_done_t)(void *private_data, struct bio *bio, int mirror_num, unsigned long bio_flags, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fc5b7d82b842..08ae94415ed4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1942,7 +1942,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * are inserted into the btree */ static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, u64 bio_offset) { struct inode *inode = private_data; @@ -8222,8 +8221,7 @@ static void btrfs_endio_direct_write(struct bio *bio) } static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data, - struct bio *bio, int mirror_num, - unsigned long bio_flags, u64 offset) + struct bio *bio, u64 offset) { struct inode *inode = private_data; blk_status_t ret; -- cgit From 6c553435870bf351c594437b4ba8babbdb0bb37e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 8 Mar 2018 13:47:33 +0100 Subject: btrfs: remove unused parameters from extent_submit_bio_done_t Remove parameters not used by any of the callbacks. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 ++---- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6fe5a959d56b..7cec8a003838 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,8 +735,7 @@ static void run_one_async_done(struct btrfs_work *work) return; } - async->submit_bio_done(async->private_data, async->bio, async->mirror_num, - async->bio_flags, async->bio_offset); + async->submit_bio_done(async->private_data, async->bio, async->mirror_num); } static void run_one_async_free(struct btrfs_work *work) @@ -809,8 +808,7 @@ static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio } static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) + int mirror_num) { struct inode *inode = private_data; blk_status_t ret; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6596b697b827..b77d84909863 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -100,8 +100,7 @@ typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, struct bio *bio, u64 bio_offset); typedef blk_status_t (extent_submit_bio_done_t)(void *private_data, - struct bio *bio, int mirror_num, unsigned long bio_flags, - u64 bio_offset); + struct bio *bio, int mirror_num); struct extent_io_ops { /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 08ae94415ed4..a03712d496d6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1961,8 +1961,7 @@ static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio * are inserted into the btree */ static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset) + int mirror_num) { struct inode *inode = private_data; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -- cgit From d0ee39349311ce823c36ece7fa066827ab8eb7af Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 8 Mar 2018 14:35:48 +0100 Subject: btrfs: rename submit callbacks and drop double underscores Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/inode.c | 23 +++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cec8a003838..156116655a32 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -797,7 +797,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) return errno_to_blk_status(ret); } -static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio, +static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio, u64 bio_offset) { /* @@ -807,7 +807,7 @@ static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio return btree_csum_one_bio(bio); } -static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio, +static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio, int mirror_num) { struct inode *inode = private_data; @@ -867,8 +867,8 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, bio_offset, private_data, - __btree_submit_bio_start, - __btree_submit_bio_done); + btree_submit_bio_start, + btree_submit_bio_done); } if (ret) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a03712d496d6..6c08c03fc03c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1941,7 +1941,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio, +static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio, u64 bio_offset) { struct inode *inode = private_data; @@ -1960,7 +1960,7 @@ static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio * At IO completion time the cums attached on the ordered extent record * are inserted into the btree */ -static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio, +static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, int mirror_num) { struct inode *inode = private_data; @@ -2033,8 +2033,8 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, bio_offset, inode, - __btrfs_submit_bio_start, - __btrfs_submit_bio_done); + btrfs_submit_bio_start, + btrfs_submit_bio_done); goto out; } else if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, 0, 0); @@ -8219,7 +8219,7 @@ static void btrfs_endio_direct_write(struct bio *bio) bio_put(bio); } -static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data, +static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data, struct bio *bio, u64 offset) { struct inode *inode = private_data; @@ -8300,9 +8300,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, return 0; } -static inline blk_status_t -__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, - int async_submit) +static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, + struct inode *inode, u64 file_offset, int async_submit) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_dio_private *dip = bio->bi_private; @@ -8325,8 +8324,8 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, if (write && async_submit) { ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0, file_offset, inode, - __btrfs_submit_bio_start_direct_io, - __btrfs_submit_bio_done); + btrfs_submit_bio_start_direct_io, + btrfs_submit_bio_done); goto err; } else if (write) { /* @@ -8412,7 +8411,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) */ atomic_inc(&dip->pending_bios); - status = __btrfs_submit_dio_bio(bio, inode, file_offset, + status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); if (status) { bio_put(bio); @@ -8432,7 +8431,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) } while (submit_len > 0); submit: - status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); + status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); if (!status) return 0; -- cgit From 776c4a7ce86f4c95d9fc6307d44a4e9e1a740e40 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 13 Mar 2018 10:26:06 +0200 Subject: btrfs: Use sizeof directly instead of a constant variable The kernel would like to have all stack VLA usage removed[1]. Unfortunately using an integer constant variable as the size of an array is still considered a VLA. Instead let's use directly sizeof(var) which removes the VLA usage. Use the occasion to remove csum_size altogether and use sizeof() also for the size passed to memcmp [1]: https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 156116655a32..b203982d66bc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -403,8 +403,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, if (csum_type == BTRFS_CSUM_TYPE_CRC32) { u32 crc = ~(u32)0; - const int csum_size = sizeof(crc); - char result[csum_size]; + char result[sizeof(crc)]; /* * The super_block structure does not span the whole @@ -415,7 +414,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); btrfs_csum_final(crc, result); - if (memcmp(raw_disk_sb, result, csum_size)) + if (memcmp(raw_disk_sb, result, sizeof(result))) ret = 1; } -- cgit From 338dae1ae64f413bc058d0f2e964480add5ddacc Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 15 Mar 2018 14:36:26 +0200 Subject: btrfs: remove max_active var from open_ctree Introduced by 5cdc7ad337fb ("btrfs: Replace fs_info->workers with btrfs_workqueue.") but obsoleted by 2a4581983f90 ("btrfs: factor btrfs_init_workqueues() out of open_ctree()"). Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b203982d66bc..04834cb65272 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2389,7 +2389,6 @@ int open_ctree(struct super_block *sb, int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; - u32 max_active; int clear_free_space_tree = 0; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2698,8 +2697,6 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - max_active = fs_info->thread_pool_size; - ret = btrfs_init_workqueues(fs_info, fs_devices); if (ret) { err = ret; -- cgit From 8a5a916d9a35e13576d79cc16e24611821b13e34 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 16 Mar 2018 14:36:27 -0400 Subject: btrfs: fix lockdep splat in btrfs_alloc_subvolume_writers While running btrfs/011, I hit the following lockdep splat. This is the important bit: pcpu_alloc+0x1ac/0x5e0 __percpu_counter_init+0x4e/0xb0 btrfs_init_fs_root+0x99/0x1c0 [btrfs] btrfs_get_fs_root.part.54+0x5b/0x150 [btrfs] resolve_indirect_refs+0x130/0x830 [btrfs] find_parent_nodes+0x69e/0xff0 [btrfs] btrfs_find_all_roots_safe+0xa0/0x110 [btrfs] btrfs_find_all_roots+0x50/0x70 [btrfs] btrfs_qgroup_prepare_account_extents+0x53/0x90 [btrfs] btrfs_commit_transaction+0x3ce/0x9b0 [btrfs] The percpu_counter_init call in btrfs_alloc_subvolume_writers uses GFP_KERNEL, which we can't do during transaction commit. This switches it to GFP_NOFS. ======================================================== WARNING: possible irq lock inversion dependency detected 4.12.14-kvmsmall #8 Tainted: G W -------------------------------------------------------- kswapd0/50 just changed the state of lock: (&delayed_node->mutex){+.+.-.}, at: [] __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] but this lock took another, RECLAIM_FS-unsafe lock in the past: (pcpu_alloc_mutex){+.+.+.} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &delayed_node->mutex --> &found->groups_sem --> pcpu_alloc_mutex Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pcpu_alloc_mutex); local_irq_disable(); lock(&delayed_node->mutex); lock(&found->groups_sem); lock(&delayed_node->mutex); *** DEADLOCK *** 2 locks held by kswapd0/50: #0: (shrinker_rwsem){++++..}, at: [] shrink_slab+0x7f/0x5b0 #1: (&type->s_umount_key#30){+++++.}, at: [] trylock_super+0x16/0x50 the shortest dependencies between 2nd lock and 1st lock: -> (pcpu_alloc_mutex){+.+.+.} ops: 4904 { HARDIRQ-ON-W at: __mutex_lock+0x4e/0x8c0 pcpu_alloc+0x1ac/0x5e0 alloc_kmem_cache_cpus.isra.70+0x25/0xa0 __do_tune_cpucache+0x2c/0x220 do_tune_cpucache+0x26/0xc0 enable_cpucache+0x6d/0xf0 kmem_cache_init_late+0x42/0x75 start_kernel+0x343/0x4cb x86_64_start_kernel+0x127/0x134 secondary_startup_64+0xa5/0xb0 SOFTIRQ-ON-W at: __mutex_lock+0x4e/0x8c0 pcpu_alloc+0x1ac/0x5e0 alloc_kmem_cache_cpus.isra.70+0x25/0xa0 __do_tune_cpucache+0x2c/0x220 do_tune_cpucache+0x26/0xc0 enable_cpucache+0x6d/0xf0 kmem_cache_init_late+0x42/0x75 start_kernel+0x343/0x4cb x86_64_start_kernel+0x127/0x134 secondary_startup_64+0xa5/0xb0 RECLAIM_FS-ON-W at: __kmalloc+0x47/0x310 pcpu_extend_area_map+0x2b/0xc0 pcpu_alloc+0x3ec/0x5e0 alloc_kmem_cache_cpus.isra.70+0x25/0xa0 __do_tune_cpucache+0x2c/0x220 do_tune_cpucache+0x26/0xc0 enable_cpucache+0x6d/0xf0 __kmem_cache_create+0x1bf/0x390 create_cache+0xba/0x1b0 kmem_cache_create+0x1f8/0x2b0 ksm_init+0x6f/0x19d do_one_initcall+0x50/0x1b0 kernel_init_freeable+0x201/0x289 kernel_init+0xa/0x100 ret_from_fork+0x3a/0x50 INITIAL USE at: __mutex_lock+0x4e/0x8c0 pcpu_alloc+0x1ac/0x5e0 alloc_kmem_cache_cpus.isra.70+0x25/0xa0 setup_cpu_cache+0x2f/0x1f0 __kmem_cache_create+0x1bf/0x390 create_boot_cache+0x8b/0xb1 kmem_cache_init+0xa1/0x19e start_kernel+0x270/0x4cb x86_64_start_kernel+0x127/0x134 secondary_startup_64+0xa5/0xb0 } ... key at: [] pcpu_alloc_mutex+0x70/0xa0 ... acquired at: pcpu_alloc+0x1ac/0x5e0 __percpu_counter_init+0x4e/0xb0 btrfs_init_fs_root+0x99/0x1c0 [btrfs] btrfs_get_fs_root.part.54+0x5b/0x150 [btrfs] resolve_indirect_refs+0x130/0x830 [btrfs] find_parent_nodes+0x69e/0xff0 [btrfs] btrfs_find_all_roots_safe+0xa0/0x110 [btrfs] btrfs_find_all_roots+0x50/0x70 [btrfs] btrfs_qgroup_prepare_account_extents+0x53/0x90 [btrfs] btrfs_commit_transaction+0x3ce/0x9b0 [btrfs] transaction_kthread+0x176/0x1b0 [btrfs] kthread+0x102/0x140 ret_from_fork+0x3a/0x50 -> (&fs_info->commit_root_sem){++++..} ops: 1566382 { HARDIRQ-ON-W at: down_write+0x3e/0xa0 cache_block_group+0x287/0x420 [btrfs] find_free_extent+0x106c/0x12d0 [btrfs] btrfs_reserve_extent+0xd8/0x170 [btrfs] cow_file_range.isra.66+0x133/0x470 [btrfs] run_delalloc_range+0x121/0x410 [btrfs] writepage_delalloc.isra.50+0xfe/0x180 [btrfs] __extent_writepage+0x19a/0x360 [btrfs] extent_write_cache_pages.constprop.56+0x249/0x3e0 [btrfs] extent_writepages+0x4d/0x60 [btrfs] do_writepages+0x1a/0x70 __filemap_fdatawrite_range+0xa7/0xe0 btrfs_rename+0x5ee/0xdb0 [btrfs] vfs_rename+0x52a/0x7e0 SyS_rename+0x351/0x3b0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 HARDIRQ-ON-R at: down_read+0x35/0x90 caching_thread+0x57/0x560 [btrfs] normal_work_helper+0x1c0/0x5e0 [btrfs] process_one_work+0x1e0/0x5c0 worker_thread+0x44/0x390 kthread+0x102/0x140 ret_from_fork+0x3a/0x50 SOFTIRQ-ON-W at: down_write+0x3e/0xa0 cache_block_group+0x287/0x420 [btrfs] find_free_extent+0x106c/0x12d0 [btrfs] btrfs_reserve_extent+0xd8/0x170 [btrfs] cow_file_range.isra.66+0x133/0x470 [btrfs] run_delalloc_range+0x121/0x410 [btrfs] writepage_delalloc.isra.50+0xfe/0x180 [btrfs] __extent_writepage+0x19a/0x360 [btrfs] extent_write_cache_pages.constprop.56+0x249/0x3e0 [btrfs] extent_writepages+0x4d/0x60 [btrfs] do_writepages+0x1a/0x70 __filemap_fdatawrite_range+0xa7/0xe0 btrfs_rename+0x5ee/0xdb0 [btrfs] vfs_rename+0x52a/0x7e0 SyS_rename+0x351/0x3b0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 SOFTIRQ-ON-R at: down_read+0x35/0x90 caching_thread+0x57/0x560 [btrfs] normal_work_helper+0x1c0/0x5e0 [btrfs] process_one_work+0x1e0/0x5c0 worker_thread+0x44/0x390 kthread+0x102/0x140 ret_from_fork+0x3a/0x50 INITIAL USE at: down_write+0x3e/0xa0 cache_block_group+0x287/0x420 [btrfs] find_free_extent+0x106c/0x12d0 [btrfs] btrfs_reserve_extent+0xd8/0x170 [btrfs] cow_file_range.isra.66+0x133/0x470 [btrfs] run_delalloc_range+0x121/0x410 [btrfs] writepage_delalloc.isra.50+0xfe/0x180 [btrfs] __extent_writepage+0x19a/0x360 [btrfs] extent_write_cache_pages.constprop.56+0x249/0x3e0 [btrfs] extent_writepages+0x4d/0x60 [btrfs] do_writepages+0x1a/0x70 __filemap_fdatawrite_range+0xa7/0xe0 btrfs_rename+0x5ee/0xdb0 [btrfs] vfs_rename+0x52a/0x7e0 SyS_rename+0x351/0x3b0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 } ... key at: [] __key.61970+0x0/0xfffffffffff9aa88 [btrfs] ... acquired at: cache_block_group+0x287/0x420 [btrfs] find_free_extent+0x106c/0x12d0 [btrfs] btrfs_reserve_extent+0xd8/0x170 [btrfs] btrfs_alloc_tree_block+0x12f/0x4c0 [btrfs] btrfs_create_tree+0xbb/0x2a0 [btrfs] btrfs_create_uuid_tree+0x37/0x140 [btrfs] open_ctree+0x23c0/0x2660 [btrfs] btrfs_mount+0xd36/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 btrfs_mount+0x18c/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 do_mount+0x1c1/0xcc0 SyS_mount+0x7e/0xd0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 -> (&found->groups_sem){++++..} ops: 2134587 { HARDIRQ-ON-W at: down_write+0x3e/0xa0 __link_block_group+0x34/0x130 [btrfs] btrfs_read_block_groups+0x33d/0x7b0 [btrfs] open_ctree+0x2054/0x2660 [btrfs] btrfs_mount+0xd36/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 btrfs_mount+0x18c/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 do_mount+0x1c1/0xcc0 SyS_mount+0x7e/0xd0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 HARDIRQ-ON-R at: down_read+0x35/0x90 btrfs_calc_num_tolerated_disk_barrier_failures+0x113/0x1f0 [btrfs] open_ctree+0x207b/0x2660 [btrfs] btrfs_mount+0xd36/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 btrfs_mount+0x18c/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 do_mount+0x1c1/0xcc0 SyS_mount+0x7e/0xd0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 SOFTIRQ-ON-W at: down_write+0x3e/0xa0 __link_block_group+0x34/0x130 [btrfs] btrfs_read_block_groups+0x33d/0x7b0 [btrfs] open_ctree+0x2054/0x2660 [btrfs] btrfs_mount+0xd36/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 btrfs_mount+0x18c/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 do_mount+0x1c1/0xcc0 SyS_mount+0x7e/0xd0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 SOFTIRQ-ON-R at: down_read+0x35/0x90 btrfs_calc_num_tolerated_disk_barrier_failures+0x113/0x1f0 [btrfs] open_ctree+0x207b/0x2660 [btrfs] btrfs_mount+0xd36/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 btrfs_mount+0x18c/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 do_mount+0x1c1/0xcc0 SyS_mount+0x7e/0xd0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 INITIAL USE at: down_write+0x3e/0xa0 __link_block_group+0x34/0x130 [btrfs] btrfs_read_block_groups+0x33d/0x7b0 [btrfs] open_ctree+0x2054/0x2660 [btrfs] btrfs_mount+0xd36/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 btrfs_mount+0x18c/0xf90 [btrfs] mount_fs+0x3a/0x160 vfs_kern_mount+0x66/0x150 do_mount+0x1c1/0xcc0 SyS_mount+0x7e/0xd0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 } ... key at: [] __key.59101+0x0/0xfffffffffff9ab78 [btrfs] ... acquired at: find_free_extent+0xcb4/0x12d0 [btrfs] btrfs_reserve_extent+0xd8/0x170 [btrfs] btrfs_alloc_tree_block+0x12f/0x4c0 [btrfs] __btrfs_cow_block+0x110/0x5b0 [btrfs] btrfs_cow_block+0xd7/0x290 [btrfs] btrfs_search_slot+0x1f6/0x960 [btrfs] btrfs_lookup_inode+0x2a/0x90 [btrfs] __btrfs_update_delayed_inode+0x65/0x210 [btrfs] btrfs_commit_inode_delayed_inode+0x121/0x130 [btrfs] btrfs_evict_inode+0x3fe/0x6a0 [btrfs] evict+0xc4/0x190 __dentry_kill+0xbf/0x170 dput+0x2ae/0x2f0 SyS_rename+0x2a6/0x3b0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 -> (&delayed_node->mutex){+.+.-.} ops: 5580204 { HARDIRQ-ON-W at: __mutex_lock+0x4e/0x8c0 btrfs_delayed_update_inode+0x46/0x6e0 [btrfs] btrfs_update_inode+0x83/0x110 [btrfs] btrfs_dirty_inode+0x62/0xe0 [btrfs] touch_atime+0x8c/0xb0 do_generic_file_read+0x818/0xb10 __vfs_read+0xdc/0x150 vfs_read+0x8a/0x130 SyS_read+0x45/0xa0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 SOFTIRQ-ON-W at: __mutex_lock+0x4e/0x8c0 btrfs_delayed_update_inode+0x46/0x6e0 [btrfs] btrfs_update_inode+0x83/0x110 [btrfs] btrfs_dirty_inode+0x62/0xe0 [btrfs] touch_atime+0x8c/0xb0 do_generic_file_read+0x818/0xb10 __vfs_read+0xdc/0x150 vfs_read+0x8a/0x130 SyS_read+0x45/0xa0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 IN-RECLAIM_FS-W at: __mutex_lock+0x4e/0x8c0 __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] btrfs_evict_inode+0x22c/0x6a0 [btrfs] evict+0xc4/0x190 dispose_list+0x35/0x50 prune_icache_sb+0x42/0x50 super_cache_scan+0x139/0x190 shrink_slab+0x262/0x5b0 shrink_node+0x2eb/0x2f0 kswapd+0x2eb/0x890 kthread+0x102/0x140 ret_from_fork+0x3a/0x50 INITIAL USE at: __mutex_lock+0x4e/0x8c0 btrfs_delayed_update_inode+0x46/0x6e0 [btrfs] btrfs_update_inode+0x83/0x110 [btrfs] btrfs_dirty_inode+0x62/0xe0 [btrfs] touch_atime+0x8c/0xb0 do_generic_file_read+0x818/0xb10 __vfs_read+0xdc/0x150 vfs_read+0x8a/0x130 SyS_read+0x45/0xa0 do_syscall_64+0x79/0x1e0 entry_SYSCALL_64_after_hwframe+0x42/0xb7 } ... key at: [] __key.56935+0x0/0xfffffffffff96b78 [btrfs] ... acquired at: __lock_acquire+0x264/0x11c0 lock_acquire+0xbd/0x1e0 __mutex_lock+0x4e/0x8c0 __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] btrfs_evict_inode+0x22c/0x6a0 [btrfs] evict+0xc4/0x190 dispose_list+0x35/0x50 prune_icache_sb+0x42/0x50 super_cache_scan+0x139/0x190 shrink_slab+0x262/0x5b0 shrink_node+0x2eb/0x2f0 kswapd+0x2eb/0x890 kthread+0x102/0x140 ret_from_fork+0x3a/0x50 stack backtrace: CPU: 1 PID: 50 Comm: kswapd0 Tainted: G W 4.12.14-kvmsmall #8 SLE15 (unreleased) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x78/0xb7 print_irq_inversion_bug.part.38+0x19f/0x1aa check_usage_forwards+0x102/0x120 ? ret_from_fork+0x3a/0x50 ? check_usage_backwards+0x110/0x110 mark_lock+0x16c/0x270 __lock_acquire+0x264/0x11c0 ? pagevec_lookup_entries+0x1a/0x30 ? truncate_inode_pages_range+0x2b3/0x7f0 lock_acquire+0xbd/0x1e0 ? __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] __mutex_lock+0x4e/0x8c0 ? __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] ? __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] ? btrfs_evict_inode+0x1f6/0x6a0 [btrfs] __btrfs_release_delayed_node+0x3a/0x1f0 [btrfs] btrfs_evict_inode+0x22c/0x6a0 [btrfs] evict+0xc4/0x190 dispose_list+0x35/0x50 prune_icache_sb+0x42/0x50 super_cache_scan+0x139/0x190 shrink_slab+0x262/0x5b0 shrink_node+0x2eb/0x2f0 kswapd+0x2eb/0x890 kthread+0x102/0x140 ? mem_cgroup_shrink_node+0x2c0/0x2c0 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x3a/0x50 Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 04834cb65272..1657d6aa4fa6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1095,7 +1095,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) if (!writers) return ERR_PTR(-ENOMEM); - ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL); + ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS); if (ret < 0) { kfree(writers); return ERR_PTR(ret); -- cgit From 75cb379d2635215ad2c67750693f7dc45ad19a5f Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Mar 2018 15:25:26 -0400 Subject: btrfs: defer adding raid type kobject until after chunk relocation Any time the first block group of a new type is created, we add a new kobject to sysfs to hold the attributes for that type. Kobject-internal allocations always use GFP_KERNEL, making them prone to fs-reclaim races. While it appears as if this can occur any time a block group is created, the only times the first block group of a new type can be created in memory is at mount and when we create the first new block group during raid conversion. This patch adds a new list to track pending kobject additions and then handles them after we do chunk relocation. Between relocating the target chunk (or forcing allocation of a new chunk in the case of data) and removing the old chunk, we're in a safe place for fs-reclaim to occur. We're holding the volume mutex, which is already held across page faults, and the delete_unused_bgs_mutex, which will only stall the cleaner thread. Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 ++++- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 60 +++++++++++++++++++++++++++++++++++--------------- fs/btrfs/sysfs.c | 2 +- fs/btrfs/volumes.c | 12 ++++++++++ 5 files changed, 62 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ffa72ca5755d..8d3aa56b928b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -385,8 +385,9 @@ struct btrfs_dev_replace { /* For raid type sysfs entries */ struct raid_kobject { - int raid_type; + u64 flags; struct kobject kobj; + struct list_head list; }; struct btrfs_space_info { @@ -940,6 +941,8 @@ struct btrfs_fs_info { u32 thread_pool_size; struct kobject *space_info_kobj; + struct list_head pending_raid_kobjs; + spinlock_t pending_raid_kobjs_lock; /* uncontended */ u64 total_pinned; @@ -2700,6 +2703,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, u64 chunk_offset, u64 size); +void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1657d6aa4fa6..38b387ae78f8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2431,6 +2431,8 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->delalloc_roots); INIT_LIST_HEAD(&fs_info->caching_block_groups); + INIT_LIST_HEAD(&fs_info->pending_raid_kobjs); + spin_lock_init(&fs_info->pending_raid_kobjs_lock); spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e10f74dac493..0b1f01dd02de 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9918,9 +9918,39 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +/* link_block_group will queue up kobjects to add when we're reclaim-safe */ +void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *space_info; + struct raid_kobject *rkobj; + LIST_HEAD(list); + int index; + int ret = 0; + + spin_lock(&fs_info->pending_raid_kobjs_lock); + list_splice_init(&fs_info->pending_raid_kobjs, &list); + spin_unlock(&fs_info->pending_raid_kobjs_lock); + + list_for_each_entry(rkobj, &list, list) { + space_info = __find_space_info(fs_info, rkobj->flags); + index = btrfs_bg_flags_to_raid_index(rkobj->flags); + + ret = kobject_add(&rkobj->kobj, &space_info->kobj, + "%s", get_raid_name(index)); + if (ret) { + kobject_put(&rkobj->kobj); + break; + } + } + if (ret) + btrfs_warn(fs_info, + "failed to add kobject for block cache, ignoring"); +} + static void link_block_group(struct btrfs_block_group_cache *cache) { struct btrfs_space_info *space_info = cache->space_info; + struct btrfs_fs_info *fs_info = cache->fs_info; int index = btrfs_bg_flags_to_raid_index(cache->flags); bool first = false; @@ -9931,27 +9961,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache) up_write(&space_info->groups_sem); if (first) { - struct raid_kobject *rkobj; - int ret; - - rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); - if (!rkobj) - goto out_err; - rkobj->raid_type = index; - kobject_init(&rkobj->kobj, &btrfs_raid_ktype); - ret = kobject_add(&rkobj->kobj, &space_info->kobj, - "%s", get_raid_name(index)); - if (ret) { - kobject_put(&rkobj->kobj); - goto out_err; + struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); + if (!rkobj) { + btrfs_warn(cache->fs_info, + "couldn't alloc memory for raid level kobject"); + return; } + rkobj->flags = cache->flags; + kobject_init(&rkobj->kobj, &btrfs_raid_ktype); + + spin_lock(&fs_info->pending_raid_kobjs_lock); + list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs); + spin_unlock(&fs_info->pending_raid_kobjs_lock); space_info->block_group_kobjs[index] = &rkobj->kobj; } - - return; -out_err: - btrfs_warn(cache->fs_info, - "failed to add kobject for block cache, ignoring"); } static struct btrfs_block_group_cache * @@ -10167,6 +10190,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) inc_block_group_ro(cache, 1); } + btrfs_add_raid_kobjects(info); init_global_block_rsv(info); ret = 0; error: diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 6af7b58e1a90..ca067471cd46 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj, { struct btrfs_space_info *sinfo = to_space_info(kobj->parent); struct btrfs_block_group_cache *block_group; - int index = to_raid_kobj(kobj)->raid_type; + int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags); u64 val = 0; down_read(&sinfo->groups_sem); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 73de042158f1..4fc6acf65220 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3003,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) if (ret) return ret; + /* + * We add the kobjects here (and after forcing data chunk creation) + * since relocation is the only place we'll create chunks of a new + * type at runtime. The only place where we'll remove the last + * chunk of a type is the call immediately below this one. Even + * so, we're protected against races with the cleaner thread since + * we're covered by the delete_unused_bgs_mutex. + */ + btrfs_add_raid_kobjects(fs_info); + trans = btrfs_start_trans_remove_block_group(root->fs_info, chunk_offset); if (IS_ERR(trans)) { @@ -3130,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, if (ret < 0) return ret; + btrfs_add_raid_kobjects(fs_info); + return 1; } } -- cgit From e1211d0e896b71d395fe411d0e0a76f4bc336617 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 12 Dec 2017 15:34:30 +0800 Subject: btrfs: qgroup: Don't use root->qgroup_meta_rsv for qgroup Since qgroup has seperate metadata reservation types now, we can completely get rid of the old root->qgroup_meta_rsv, which mostly acts as current META_PERTRANS reservation type. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 --- fs/btrfs/disk-io.c | 1 - fs/btrfs/qgroup.c | 33 ++++++++++++++++++++++++--------- 3 files changed, 24 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f9f512be9d41..df0463e2ab7f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1264,9 +1264,6 @@ struct btrfs_root { int send_in_progress; struct btrfs_subvolume_writers *subv_writers; atomic_t will_be_snapshotted; - - /* For qgroup metadata space reserve */ - atomic64_t qgroup_meta_rsv; }; struct btrfs_file_private { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38b387ae78f8..ad900f821f1e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1163,7 +1163,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->orphan_inodes, 0); refcount_set(&root->refs, 1); atomic_set(&root->will_be_snapshotted, 0); - atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 8831eaa14204..a7ca464cdbf7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2499,6 +2499,15 @@ out: return ret; } +/* + * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0 + * qgroup). + * + * Will handle all higher level qgroup too. + * + * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup. + * This special case is only used for META_PERTRANS type. + */ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes, enum btrfs_qgroup_rsv_type type) @@ -2515,6 +2524,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, if (num_bytes == 0) return; + if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) { + WARN(1, "%s: Invalid type to free", __func__); + return; + } spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; @@ -2525,6 +2538,13 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, if (!qgroup) goto out; + /* + * We're freeing all pertrans rsv, get current value from level 0 + * qgroup as real num_bytes to free. + */ + if (num_bytes == (u64)-1) + num_bytes = qgroup->rsv.values[type]; + ulist_reinit(fs_info->qgroup_ulist); ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); @@ -3082,24 +3102,21 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ret = qgroup_reserve(root, num_bytes, enforce, type); if (ret < 0) return ret; - atomic64_add(num_bytes, &root->qgroup_meta_rsv); return ret; } void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; - u64 reserved; if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || !is_fstree(root->objectid)) return; - reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); - if (reserved == 0) - return; - trace_qgroup_meta_reserve(root, -(s64)reserved); - btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved, + /* TODO: Update trace point to handle such free */ + trace_qgroup_meta_reserve(root, 0); + /* Special value -1 means to free all reserved space */ + btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1, BTRFS_QGROUP_RSV_META_PERTRANS); } @@ -3113,8 +3130,6 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, return; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); - atomic64_sub(num_bytes, &root->qgroup_meta_rsv); trace_qgroup_meta_reserve(root, -(s64)num_bytes); btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type); } -- cgit From 8287475a20552af66b32c07704dbdbeeb898ac1f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 12 Dec 2017 15:34:34 +0800 Subject: btrfs: qgroup: Use root::qgroup_meta_rsv_* to record qgroup meta reserved space For quota disabled->enable case, it's possible that at reservation time quota was not enabled so no bytes were really reserved, while at release time, quota was enabled so we will try to release some bytes we didn't really own. Such situation can cause metadata reserveation underflow, for both types, also less possible for per-trans type since quota enable will commit transaction. To address this, record qgroup meta reserved bytes into root::qgroup_meta_rsv_pertrans and ::prealloc. So at releasing time we won't free any bytes we didn't reserve. For DATA, it's already handled by io_tree, so nothing needs to be done there. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 68 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7924e50cc528..0eb55825862a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1264,6 +1264,11 @@ struct btrfs_root { int send_in_progress; struct btrfs_subvolume_writers *subv_writers; atomic_t will_be_snapshotted; + + /* For qgroup metadata reserved space */ + spinlock_t qgroup_meta_rsv_lock; + u64 qgroup_meta_rsv_pertrans; + u64 qgroup_meta_rsv_prealloc; }; struct btrfs_file_private { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ad900f821f1e..269374261e36 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1147,6 +1147,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, spin_lock_init(&root->accounting_lock); spin_lock_init(&root->log_extents_lock[0]); spin_lock_init(&root->log_extents_lock[1]); + spin_lock_init(&root->qgroup_meta_rsv_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); mutex_init(&root->ordered_extent_mutex); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9ce626579c81..836819c34c95 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2538,11 +2538,11 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, if (!qgroup) goto out; - /* - * We're freeing all pertrans rsv, get current value from level 0 - * qgroup as real num_bytes to free. - */ if (num_bytes == (u64)-1) + /* + * We're freeing all pertrans rsv, get reserved value from + * level 0 qgroup as real num_bytes to free. + */ num_bytes = qgroup->rsv.values[type]; ulist_reinit(fs_info->qgroup_ulist); @@ -3087,6 +3087,46 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); } +static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type) +{ + if (type != BTRFS_QGROUP_RSV_META_PREALLOC && + type != BTRFS_QGROUP_RSV_META_PERTRANS) + return; + if (num_bytes == 0) + return; + + spin_lock(&root->qgroup_meta_rsv_lock); + if (type == BTRFS_QGROUP_RSV_META_PREALLOC) + root->qgroup_meta_rsv_prealloc += num_bytes; + else + root->qgroup_meta_rsv_pertrans += num_bytes; + spin_unlock(&root->qgroup_meta_rsv_lock); +} + +static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type) +{ + if (type != BTRFS_QGROUP_RSV_META_PREALLOC && + type != BTRFS_QGROUP_RSV_META_PERTRANS) + return 0; + if (num_bytes == 0) + return 0; + + spin_lock(&root->qgroup_meta_rsv_lock); + if (type == BTRFS_QGROUP_RSV_META_PREALLOC) { + num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc, + num_bytes); + root->qgroup_meta_rsv_prealloc -= num_bytes; + } else { + num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans, + num_bytes); + root->qgroup_meta_rsv_pertrans -= num_bytes; + } + spin_unlock(&root->qgroup_meta_rsv_lock); + return num_bytes; +} + int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce) { @@ -3102,6 +3142,15 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ret = qgroup_reserve(root, num_bytes, enforce, type); if (ret < 0) return ret; + /* + * Record what we have reserved into root. + * + * To avoid quota disabled->enabled underflow. + * In that case, we may try to free space we haven't reserved + * (since quota was disabled), so record what we reserved into root. + * And ensure later release won't underflow this number. + */ + add_root_meta_rsv(root, num_bytes, type); return ret; } @@ -3129,6 +3178,12 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, !is_fstree(root->objectid)) return; + /* + * reservation for META_PREALLOC can happen before quota is enabled, + * which can lead to underflow. + * Here ensure we will only free what we really have reserved. + */ + num_bytes = sub_root_meta_rsv(root, num_bytes, type); BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); trace_qgroup_meta_reserve(root, -(s64)num_bytes); btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type); @@ -3187,6 +3242,9 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || !is_fstree(root->objectid)) return; + /* Same as btrfs_qgroup_free_meta_prealloc() */ + num_bytes = sub_root_meta_rsv(root, num_bytes, + BTRFS_QGROUP_RSV_META_PREALLOC); qgroup_convert_meta(fs_info, root->objectid, num_bytes); } -- cgit From 581c1760415c48cca9349b198bba52dd38750765 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 29 Mar 2018 09:08:11 +0800 Subject: btrfs: Validate child tree block's level and first key We have several reports about node pointer points to incorrect child tree blocks, which could have even wrong owner and level but still with valid generation and checksum. Although btrfs check could handle it and print error message like: leaf parent key incorrect 60670574592 Kernel doesn't have enough check on this type of corruption correctly. At least add such check to read_tree_block() and btrfs_read_buffer(), where we need two new parameters @level and @first_key to verify the child tree block. The new @level check is mandatory and all call sites are already modified to extract expected level from its call chain. While @first_key is optional, the following call sites are skipping such check: 1) Root node/leaf As ROOT_ITEM doesn't contain the first key, skip @first_key check. 2) Direct backref Only parent bytenr and level is known and we need to resolve the key all by ourselves, skip @first_key check. Another note of this verification is, it needs extra info from nodeptr or ROOT_ITEM, so it can't fit into current tree-checker framework, which is limited to node/leaf boundary. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/backref.c | 6 ++-- fs/btrfs/ctree.c | 28 +++++++++++---- fs/btrfs/disk-io.c | 95 +++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/disk-io.h | 8 +++-- fs/btrfs/extent-tree.c | 6 +++- fs/btrfs/print-tree.c | 10 ++++-- fs/btrfs/qgroup.c | 7 ++-- fs/btrfs/ref-verify.c | 7 +++- fs/btrfs/relocation.c | 21 ++++++++--- fs/btrfs/tree-log.c | 28 +++++++++------ 10 files changed, 170 insertions(+), 46 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6007dd6b799e..571024bc632e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info, BUG_ON(ref->key_for_search.type); BUG_ON(!ref->wanted_disk_byte); - eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0); + eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0, + ref->level - 1, NULL); if (IS_ERR(eb)) { free_pref(ref); return PTR_ERR(eb); @@ -1288,7 +1289,8 @@ again: ref->level == 0) { struct extent_buffer *eb; - eb = read_tree_block(fs_info, ref->parent, 0); + eb = read_tree_block(fs_info, ref->parent, 0, + ref->level, NULL); if (IS_ERR(eb)) { ret = PTR_ERR(eb); goto out; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1ef6b67f893a..7c8faeb868f4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1354,6 +1354,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_root *old_root = NULL; u64 old_generation = 0; u64 logical; + int level; eb_root = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(eb_root, time_seq); @@ -1364,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq) old_root = &tm->old_root; old_generation = tm->generation; logical = old_root->logical; + level = old_root->level; } else { logical = eb_root->start; + level = btrfs_header_level(eb_root); } tm = tree_mod_log_search(fs_info, logical, time_seq); if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - old = read_tree_block(fs_info, logical, 0); + old = read_tree_block(fs_info, logical, 0, level, NULL); if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { if (!IS_ERR(old)) free_extent_buffer(old); @@ -1592,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, btrfs_set_lock_blocking(parent); for (i = start_slot; i <= end_slot; i++) { + struct btrfs_key first_key; int close = 1; btrfs_node_key(parent, &disk_key, i); @@ -1601,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, progress_passed = 1; blocknr = btrfs_node_blockptr(parent, i); gen = btrfs_node_ptr_generation(parent, i); + btrfs_node_key_to_cpu(parent, &first_key, i); if (last_block == 0) last_block = blocknr; @@ -1624,7 +1629,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, uptodate = 0; if (!cur || !uptodate) { if (!cur) { - cur = read_tree_block(fs_info, blocknr, gen); + cur = read_tree_block(fs_info, blocknr, gen, + parent_level - 1, + &first_key); if (IS_ERR(cur)) { return PTR_ERR(cur); } else if (!extent_buffer_uptodate(cur)) { @@ -1632,7 +1639,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, return -EIO; } } else if (!uptodate) { - err = btrfs_read_buffer(cur, gen); + err = btrfs_read_buffer(cur, gen, + parent_level - 1,&first_key); if (err) { free_extent_buffer(cur); return err; @@ -1785,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, { int level = btrfs_header_level(parent); struct extent_buffer *eb; + struct btrfs_key first_key; if (slot < 0 || slot >= btrfs_header_nritems(parent)) return ERR_PTR(-ENOENT); BUG_ON(level == 0); + btrfs_node_key_to_cpu(parent, &first_key, slot); eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), - btrfs_node_ptr_generation(parent, slot)); + btrfs_node_ptr_generation(parent, slot), + level - 1, &first_key); if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); eb = ERR_PTR(-EIO); @@ -2388,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, u64 gen; struct extent_buffer *b = *eb_ret; struct extent_buffer *tmp; + struct btrfs_key first_key; int ret; + int parent_level; blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); + parent_level = btrfs_header_level(b); + btrfs_node_key_to_cpu(b, &first_key, slot); tmp = find_extent_buffer(fs_info, blocknr); if (tmp) { @@ -2410,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, btrfs_set_path_blocking(p); /* now we're allowed to do a blocking uptodate check */ - ret = btrfs_read_buffer(tmp, gen); + ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); if (!ret) { *eb_ret = tmp; return 0; @@ -2437,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, btrfs_release_path(p); ret = -EAGAIN; - tmp = read_tree_block(fs_info, blocknr, 0); + tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1, + &first_key); if (!IS_ERR(tmp)) { /* * If the read above didn't mark this buffer up to date, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 269374261e36..a2f3a0c67a99 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return ret; } +static int verify_level_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int level, + struct btrfs_key *first_key) +{ + int found_level; + struct btrfs_key found_key; + int ret; + + found_level = btrfs_header_level(eb); + if (found_level != level) { +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(1); + btrfs_err(fs_info, +"tree level mismatch detected, bytenr=%llu level expected=%u has=%u", + eb->start, level, found_level); +#endif + return -EIO; + } + + if (!first_key) + return 0; + + if (found_level) + btrfs_node_key_to_cpu(eb, &found_key, 0); + else + btrfs_item_key_to_cpu(eb, &found_key, 0); + ret = btrfs_comp_cpu_keys(first_key, &found_key); + +#ifdef CONFIG_BTRFS_DEBUG + if (ret) { + WARN_ON(1); + btrfs_err(fs_info, +"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)", + eb->start, first_key->objectid, first_key->type, + first_key->offset, found_key.objectid, + found_key.type, found_key.offset); + } +#endif + return ret; +} + /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. + * + * @parent_transid: expected transid, skip check if 0 + * @level: expected level, mandatory check + * @first_key: expected key of first slot, skip check if NULL */ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 parent_transid) + u64 parent_transid, int level, + struct btrfs_key *first_key) { struct extent_io_tree *io_tree; int failed = 0; @@ -448,11 +494,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, mirror_num); if (!ret) { - if (!verify_parent_transid(io_tree, eb, + if (verify_parent_transid(io_tree, eb, parent_transid, 0)) - break; - else ret = -EIO; + else if (verify_level_key(fs_info, eb, level, + first_key)) + ret = -EUCLEAN; + else + break; } /* @@ -460,7 +509,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, * there is no reason to read the other copies, they won't be * any less wrong. */ - if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) + if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) || + ret == -EUCLEAN) break; num_copies = btrfs_num_copies(fs_info, @@ -1049,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf) buf->start, buf->start + buf->len - 1); } +/* + * Read tree block at logical address @bytenr and do variant basic but critical + * verification. + * + * @parent_transid: expected transid of this tree block, skip check if 0 + * @level: expected level, mandatory check + * @first_key: expected key in slot 0, skip check if NULL + */ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, - u64 parent_transid) + u64 parent_transid, int level, + struct btrfs_key *first_key) { struct extent_buffer *buf = NULL; int ret; @@ -1059,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, if (IS_ERR(buf)) return buf; - ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid); + ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + level, first_key); if (ret) { free_extent_buffer(buf); return ERR_PTR(ret); @@ -1388,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_path *path; u64 generation; int ret; + int level; path = btrfs_alloc_path(); if (!path) @@ -1410,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); + level = btrfs_root_level(&root->root_item); root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), - generation); + generation, level, NULL); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); goto find_fail; @@ -2261,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); + int level = btrfs_super_log_root_level(disk_super); if (fs_devices->rw_devices == 0) { btrfs_warn(fs_info, "log replay required on RO media"); @@ -2274,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, __setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(fs_info, bytenr, - fs_info->generation + 1); + fs_info->generation + 1, + level, NULL); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); @@ -2390,6 +2454,7 @@ int open_ctree(struct super_block *sb, int num_backups_tried = 0; int backup_index = 0; int clear_free_space_tree = 0; + int level; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); @@ -2725,12 +2790,13 @@ int open_ctree(struct super_block *sb, } generation = btrfs_super_chunk_root_generation(disk_super); + level = btrfs_super_chunk_root_level(disk_super); __setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(fs_info, btrfs_super_chunk_root(disk_super), - generation); + generation, level, NULL); if (IS_ERR(chunk_root->node) || !extent_buffer_uptodate(chunk_root->node)) { btrfs_err(fs_info, "failed to read chunk root"); @@ -2764,10 +2830,11 @@ int open_ctree(struct super_block *sb, retry_root_backup: generation = btrfs_super_generation(disk_super); + level = btrfs_super_root_level(disk_super); tree_root->node = read_tree_block(fs_info, btrfs_super_root(disk_super), - generation); + generation, level, NULL); if (IS_ERR(tree_root->node) || !extent_buffer_uptodate(tree_root->node)) { btrfs_warn(fs_info, "failed to read tree root"); @@ -3887,12 +3954,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) __btrfs_btree_balance_dirty(fs_info, 0); } -int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, + struct btrfs_key *first_key) { struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; struct btrfs_fs_info *fs_info = root->fs_info; - return btree_read_extent_buffer_pages(fs_info, buf, parent_transid); + return btree_read_extent_buffer_pages(fs_info, buf, parent_transid, + level, first_key); } static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 70a88d61b547..453ea9f5d4e9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror) struct btrfs_device; struct btrfs_fs_devices; -struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, - u64 bytenr, u64 parent_transid); +struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, + u64 parent_transid, int level, + struct btrfs_key *first_key); void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr); int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, int mirror_num, struct extent_buffer **eb); @@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root) void btrfs_mark_buffer_dirty(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); -int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, + struct btrfs_key *first_key); u32 btrfs_csum_data(const char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, u8 *result); blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6b07202385d3..72f6c03445b6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8710,6 +8710,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 parent; u32 blocksize; struct btrfs_key key; + struct btrfs_key first_key; struct extent_buffer *next; int level = wc->level; int reada = 0; @@ -8730,6 +8731,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); + btrfs_node_key_to_cpu(path->nodes[level], &first_key, + path->slots[level]); blocksize = fs_info->nodesize; next = find_extent_buffer(fs_info, bytenr); @@ -8794,7 +8797,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (!next) { if (reada && level == 1) reada_walk_down(trans, root, wc, path); - next = read_tree_block(fs_info, bytenr, generation); + next = read_tree_block(fs_info, bytenr, generation, level - 1, + &first_key); if (IS_ERR(next)) { return PTR_ERR(next); } else if (!extent_buffer_uptodate(next)) { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 569205e651c7..4a8770485f77 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -365,9 +365,13 @@ void btrfs_print_tree(struct extent_buffer *c) btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { - struct extent_buffer *next = read_tree_block(fs_info, - btrfs_node_blockptr(c, i), - btrfs_node_ptr_generation(c, i)); + struct btrfs_key first_key; + struct extent_buffer *next; + + btrfs_node_key_to_cpu(c, &first_key, i); + next = read_tree_block(fs_info, btrfs_node_blockptr(c, i), + btrfs_node_ptr_generation(c, i), + level - 1, &first_key); if (IS_ERR(next)) { continue; } else if (!extent_buffer_uptodate(next)) { diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 6b715d6d3c94..875df02ffaee 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1684,7 +1684,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, return 0; if (!extent_buffer_uptodate(root_eb)) { - ret = btrfs_read_buffer(root_eb, root_gen); + ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL); if (ret) goto out; } @@ -1715,6 +1715,7 @@ walk_down: level = root_level; while (level >= 0) { if (path->nodes[level] == NULL) { + struct btrfs_key first_key; int parent_slot; u64 child_gen; u64 child_bytenr; @@ -1727,8 +1728,10 @@ walk_down: parent_slot = path->slots[level + 1]; child_bytenr = btrfs_node_blockptr(eb, parent_slot); child_gen = btrfs_node_ptr_generation(eb, parent_slot); + btrfs_node_key_to_cpu(eb, &first_key, parent_slot); - eb = read_tree_block(fs_info, child_bytenr, child_gen); + eb = read_tree_block(fs_info, child_bytenr, child_gen, + level, &first_key); if (IS_ERR(eb)) { ret = PTR_ERR(eb); goto out; diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 171f3cce30e6..35fab67dcbe8 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -579,11 +579,16 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, while (level >= 0) { if (level) { + struct btrfs_key first_key; + block_bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); gen = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]); - eb = read_tree_block(fs_info, block_bytenr, gen); + btrfs_node_key_to_cpu(path->nodes[level], &first_key, + path->slots[level]); + eb = read_tree_block(fs_info, block_bytenr, gen, + level - 1, &first_key); if (IS_ERR(eb)) return PTR_ERR(eb); if (!extent_buffer_uptodate(eb)) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e61e1ee9af9a..4874c09f6d3c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1839,6 +1839,8 @@ again: parent = eb; while (1) { + struct btrfs_key first_key; + level = btrfs_header_level(parent); BUG_ON(level < lowest_level); @@ -1852,6 +1854,7 @@ again: old_bytenr = btrfs_node_blockptr(parent, slot); blocksize = fs_info->nodesize; old_ptr_gen = btrfs_node_ptr_generation(parent, slot); + btrfs_node_key_to_cpu(parent, &key, slot); if (level <= max_level) { eb = path->nodes[level]; @@ -1876,7 +1879,8 @@ again: break; } - eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen); + eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen, + level - 1, &first_key); if (IS_ERR(eb)) { ret = PTR_ERR(eb); break; @@ -2036,6 +2040,8 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, last_snapshot = btrfs_root_last_snapshot(&root->root_item); for (i = *level; i > 0; i--) { + struct btrfs_key first_key; + eb = path->nodes[i]; nritems = btrfs_header_nritems(eb); while (path->slots[i] < nritems) { @@ -2056,7 +2062,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, } bytenr = btrfs_node_blockptr(eb, path->slots[i]); - eb = read_tree_block(fs_info, bytenr, ptr_gen); + btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]); + eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1, + &first_key); if (IS_ERR(eb)) { return PTR_ERR(eb); } else if (!extent_buffer_uptodate(eb)) { @@ -2714,6 +2722,8 @@ static int do_relocation(struct btrfs_trans_handle *trans, path->lowest_level = node->level + 1; rc->backref_cache.path[node->level] = node; list_for_each_entry(edge, &node->upper, list[LOWER]) { + struct btrfs_key first_key; + cond_resched(); upper = edge->node[UPPER]; @@ -2779,7 +2789,9 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = root->fs_info->nodesize; generation = btrfs_node_ptr_generation(upper->eb, slot); - eb = read_tree_block(fs_info, bytenr, generation); + btrfs_node_key_to_cpu(upper->eb, &first_key, slot); + eb = read_tree_block(fs_info, bytenr, generation, + upper->level - 1, &first_key); if (IS_ERR(eb)) { err = PTR_ERR(eb); goto next; @@ -2944,7 +2956,8 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb; BUG_ON(block->key_ready); - eb = read_tree_block(fs_info, block->bytenr, block->key.offset); + eb = read_tree_block(fs_info, block->bytenr, block->key.offset, + block->level, NULL); if (IS_ERR(eb)) { return PTR_ERR(eb); } else if (!extent_buffer_uptodate(eb)) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 70afd1085033..c91babc6aa4b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -286,7 +286,7 @@ struct walk_control { * inside it */ int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, - struct walk_control *wc, u64 gen); + struct walk_control *wc, u64 gen, int level); }; /* @@ -294,7 +294,7 @@ struct walk_control { */ static int process_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, - struct walk_control *wc, u64 gen) + struct walk_control *wc, u64 gen, int level) { struct btrfs_fs_info *fs_info = log->fs_info; int ret = 0; @@ -304,7 +304,7 @@ static int process_one_buffer(struct btrfs_root *log, * pin down any logged extents, so we have to read the block. */ if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { - ret = btrfs_read_buffer(eb, gen); + ret = btrfs_read_buffer(eb, gen, level, NULL); if (ret) return ret; } @@ -2406,17 +2406,16 @@ out: * back refs). */ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, - struct walk_control *wc, u64 gen) + struct walk_control *wc, u64 gen, int level) { int nritems; struct btrfs_path *path; struct btrfs_root *root = wc->replay_dest; struct btrfs_key key; - int level; int i; int ret; - ret = btrfs_read_buffer(eb, gen); + ret = btrfs_read_buffer(eb, gen, level, NULL); if (ret) return ret; @@ -2533,6 +2532,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); while (*level > 0) { + struct btrfs_key first_key; + WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; @@ -2545,6 +2546,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]); blocksize = fs_info->nodesize; parent = path->nodes[*level]; @@ -2555,7 +2557,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return PTR_ERR(next); if (*level == 1) { - ret = wc->process_func(root, next, wc, ptr_gen); + ret = wc->process_func(root, next, wc, ptr_gen, + *level - 1); if (ret) { free_extent_buffer(next); return ret; @@ -2563,7 +2566,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, path->slots[*level]++; if (wc->free) { - ret = btrfs_read_buffer(next, ptr_gen); + ret = btrfs_read_buffer(next, ptr_gen, + *level - 1, &first_key); if (ret) { free_extent_buffer(next); return ret; @@ -2593,7 +2597,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); continue; } - ret = btrfs_read_buffer(next, ptr_gen); + ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key); if (ret) { free_extent_buffer(next); return ret; @@ -2643,7 +2647,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); ret = wc->process_func(root, path->nodes[*level], wc, - btrfs_header_generation(path->nodes[*level])); + btrfs_header_generation(path->nodes[*level]), + *level); if (ret) return ret; @@ -2725,7 +2730,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, /* was the root node processed? if not, catch it here */ if (path->nodes[orig_level]) { ret = wc->process_func(log, path->nodes[orig_level], wc, - btrfs_header_generation(path->nodes[orig_level])); + btrfs_header_generation(path->nodes[orig_level]), + orig_level); if (ret) goto out; if (wc->free) { -- cgit From f50f43539070c6c876bf5435fb23f898f9d81e72 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 29 Mar 2018 06:11:45 +0800 Subject: Btrfs: print error messages when failing to read trees When mount fails to read trees like fs tree, checksum tree, extent tree, etc, there is not enough information about where went wrong. With this, messages like "BTRFS warning (device sdf): failed to read root (objectid=7): -5" would help us a bit. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a2f3a0c67a99..07b5e6f7df67 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2383,23 +2383,29 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) location.offset = 0; root = btrfs_read_tree_root(tree_root, &location); - if (IS_ERR(root)) - return PTR_ERR(root); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->extent_root = root; location.objectid = BTRFS_DEV_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); - if (IS_ERR(root)) - return PTR_ERR(root); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->dev_root = root; btrfs_init_devices_late(fs_info); location.objectid = BTRFS_CSUM_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); - if (IS_ERR(root)) - return PTR_ERR(root); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->csum_root = root; @@ -2416,7 +2422,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) if (IS_ERR(root)) { ret = PTR_ERR(root); if (ret != -ENOENT) - return ret; + goto out; } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->uuid_root = root; @@ -2425,13 +2431,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); - if (IS_ERR(root)) - return PTR_ERR(root); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->free_space_root = root; } return 0; +out: + btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d", + location.objectid, ret); + return ret; } int open_ctree(struct super_block *sb, @@ -3004,6 +3016,7 @@ retry_root_backup: fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); + btrfs_warn(fs_info, "failed to read fs tree: %d", err); goto fail_qgroup; } -- cgit