diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 216 |
1 files changed, 121 insertions, 95 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b53f0e30ce2b..dabc79c1af1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -96,7 +96,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, first_page_part - BTRFS_CSUM_SIZE); - for (i = 1; i < num_pages; i++) { + for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = page_address(buf->pages[i]); crypto_shash_update(shash, kaddr, PAGE_SIZE); } @@ -242,7 +242,6 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; - u64 start = eb->start; int i, num_pages = num_extent_pages(eb); int ret = 0; @@ -251,12 +250,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; + u64 start = max_t(u64, eb->start, page_offset(p)); + u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE); + u32 len = end - start; - ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, - start, p, start - page_offset(p), mirror_num); + ret = btrfs_repair_io_failure(fs_info, 0, start, len, + start, p, offset_in_page(start), mirror_num); if (ret) break; - start += PAGE_SIZE; } return ret; @@ -995,13 +996,18 @@ int btrfs_global_root_insert(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *tmp; + int ret = 0; write_lock(&fs_info->global_root_lock); tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp); write_unlock(&fs_info->global_root_lock); - ASSERT(!tmp); - return tmp ? -EEXIST : 0; + if (tmp) { + ret = -EEXIST; + btrfs_warn(fs_info, "global root %llu %llu already exists", + root->root_key.objectid, root->root_key.offset); + } + return ret; } void btrfs_global_root_delete(struct btrfs_root *root) @@ -1341,17 +1347,8 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) { int ret; - unsigned int nofs_flag; - /* - * We might be called under a transaction (e.g. indirect backref - * resolution) which could deadlock if it triggers memory reclaim - */ - nofs_flag = memalloc_nofs_save(); - ret = btrfs_drew_lock_init(&root->snapshot_lock); - memalloc_nofs_restore(nofs_flag); - if (ret) - goto fail; + btrfs_drew_lock_init(&root->snapshot_lock); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && !btrfs_is_data_reloc_root(root)) { @@ -2065,7 +2062,6 @@ void btrfs_put_root(struct btrfs_root *root) WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); - btrfs_drew_lock_destroy(&root->snapshot_lock); free_root_extent_buffers(root); #ifdef CONFIG_BTRFS_DEBUG spin_lock(&root->fs_info->fs_roots_radix_lock); @@ -2125,11 +2121,16 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->reloc_cancel_req, 0); } -static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) +static int btrfs_init_btree_inode(struct super_block *sb) { - struct inode *inode = fs_info->btree_inode; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID, fs_info->tree_root); + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + return -ENOMEM; inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; set_nlink(inode, 1); @@ -2140,6 +2141,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) */ inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &btree_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, @@ -2152,6 +2154,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) BTRFS_I(inode)->location.offset = 0; set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); __insert_inode_hash(inode, hash); + fs_info->btree_inode = inode; + + return 0; } static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) @@ -2250,6 +2255,20 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) fs_info->csum_shash = csum_shash; + /* + * Check if the checksum implementation is a fast accelerated one. + * As-is this is a bit of a hack and should be replaced once the csum + * implementations provide that information themselves. + */ + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + if (!strstr(crypto_shash_driver_name(csum_shash), "generic")) + set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); + break; + default: + break; + } + btrfs_info(fs_info, "using %s (%s) checksum algorithm", btrfs_super_csum_name(csum_type), crypto_shash_driver_name(csum_shash)); @@ -2828,6 +2847,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) /* We can't trust the free space cache either */ btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); + btrfs_warn(fs_info, "try to load backup roots slot %d", i); ret = read_backup_root(fs_info, i); backup_index = ret; if (ret < 0) @@ -2952,7 +2972,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) atomic64_set(&fs_info->free_chunk_space, 0); fs_info->tree_mod_log = RB_ROOT; fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; - fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ btrfs_init_ref_verify(fs_info); fs_info->thread_pool_size = min_t(unsigned long, @@ -3109,23 +3128,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) { int ret; const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); - bool clear_free_space_tree = false; + bool rebuild_free_space_tree = false; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { - clear_free_space_tree = true; + rebuild_free_space_tree = true; } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { btrfs_warn(fs_info, "free space tree is invalid"); - clear_free_space_tree = true; + rebuild_free_space_tree = true; } - if (clear_free_space_tree) { - btrfs_info(fs_info, "clearing free space tree"); - ret = btrfs_clear_free_space_tree(fs_info); + if (rebuild_free_space_tree) { + btrfs_info(fs_info, "rebuilding free space tree"); + ret = btrfs_rebuild_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, - "failed to clear free space tree: %d", ret); + "failed to rebuild free space tree: %d", ret); + goto out; + } + } + + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) { + btrfs_info(fs_info, "disabling free space tree"); + ret = btrfs_delete_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to disable free space tree: %d", ret); goto out; } } @@ -3330,14 +3360,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device struct btrfs_root *tree_root; struct btrfs_root *chunk_root; int ret; - int err = -EINVAL; int level; ret = init_mount_fs_info(fs_info, sb); - if (ret) { - err = ret; + if (ret) goto fail; - } /* These need to be init'ed before we start creating inodes and such. */ tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, @@ -3347,17 +3374,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device GFP_KERNEL); fs_info->chunk_root = chunk_root; if (!tree_root || !chunk_root) { - err = -ENOMEM; + ret = -ENOMEM; goto fail; } - fs_info->btree_inode = new_inode(sb); - if (!fs_info->btree_inode) { - err = -ENOMEM; + ret = btrfs_init_btree_inode(sb); + if (ret) goto fail; - } - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - btrfs_init_btree_inode(fs_info); invalidate_bdev(fs_devices->latest_dev->bdev); @@ -3366,7 +3389,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device */ disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev); if (IS_ERR(disk_super)) { - err = PTR_ERR(disk_super); + ret = PTR_ERR(disk_super); goto fail_alloc; } @@ -3378,7 +3401,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (!btrfs_supported_super_csum(csum_type)) { btrfs_err(fs_info, "unsupported checksum algorithm: %u", csum_type); - err = -EINVAL; + ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } @@ -3387,7 +3410,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ret = btrfs_init_csum_hash(fs_info, csum_type); if (ret) { - err = ret; btrfs_release_disk_super(disk_super); goto fail_alloc; } @@ -3398,7 +3420,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device */ if (btrfs_check_super_csum(fs_info, disk_super)) { btrfs_err(fs_info, "superblock checksum mismatch"); - err = -EINVAL; + ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } @@ -3428,12 +3450,15 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ret = btrfs_validate_mount_super(fs_info); if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); - err = -EINVAL; + ret = -EINVAL; goto fail_alloc; } - if (!btrfs_super_root(disk_super)) + if (!btrfs_super_root(disk_super)) { + btrfs_err(fs_info, "invalid superblock tree root bytenr"); + ret = -EINVAL; goto fail_alloc; + } /* check FS state, whether FS is broken. */ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) @@ -3460,16 +3485,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->stripesize = stripesize; ret = btrfs_parse_options(fs_info, options, sb->s_flags); - if (ret) { - err = ret; + if (ret) goto fail_alloc; - } ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); - if (ret < 0) { - err = ret; + if (ret < 0) goto fail_alloc; - } if (sectorsize < PAGE_SIZE) { struct btrfs_subpage_info *subpage_info; @@ -3489,17 +3510,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL); - if (!subpage_info) + if (!subpage_info) { + ret = -ENOMEM; goto fail_alloc; + } btrfs_init_subpage_info(subpage_info, sectorsize); fs_info->subpage_info = subpage_info; } ret = btrfs_init_workqueues(fs_info); - if (ret) { - err = ret; + if (ret) goto fail_sb_buffer; - } sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); @@ -3545,6 +3566,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device btrfs_free_extra_devids(fs_devices); if (!fs_devices->latest_dev->bdev) { btrfs_err(fs_info, "failed to read devices"); + ret = -EIO; goto fail_tree_roots; } @@ -3560,8 +3582,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ret = btrfs_get_dev_zone_info_all_devices(fs_info); if (ret) { btrfs_err(fs_info, - "zoned: failed to read device zone info: %d", - ret); + "zoned: failed to read device zone info: %d", ret); goto fail_block_groups; } @@ -3640,19 +3661,24 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); + ret = -EINVAL; goto fail_sysfs; } fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info, "btrfs-cleaner"); - if (IS_ERR(fs_info->cleaner_kthread)) + if (IS_ERR(fs_info->cleaner_kthread)) { + ret = PTR_ERR(fs_info->cleaner_kthread); goto fail_sysfs; + } fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); - if (IS_ERR(fs_info->transaction_kthread)) + if (IS_ERR(fs_info->transaction_kthread)) { + ret = PTR_ERR(fs_info->transaction_kthread); goto fail_cleaner; + } if (!btrfs_test_opt(fs_info, NOSSD) && !fs_info->fs_devices->rotating) { @@ -3670,7 +3696,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device fs_info->fs_devices->discardable) { btrfs_set_and_info(fs_info, DISCARD_ASYNC, "auto enabling async discard"); - btrfs_clear_opt(fs_info->mount_opt, NODISCARD); } #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY @@ -3697,16 +3722,14 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); - if (ret) { - err = ret; + if (ret) goto fail_qgroup; - } } fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { - err = PTR_ERR(fs_info->fs_root); - btrfs_warn(fs_info, "failed to read fs tree: %d", err); + ret = PTR_ERR(fs_info->fs_root); + btrfs_warn(fs_info, "failed to read fs tree: %d", ret); fs_info->fs_root = NULL; goto fail_qgroup; } @@ -3783,7 +3806,8 @@ fail_alloc: iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); - return err; + ASSERT(ret < 0); + return ret; } ALLOW_ERROR_INJECTION(open_ctree, ERRNO); @@ -4080,6 +4104,8 @@ static void write_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; + device->last_flush_error = BLK_STS_OK; + #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY /* * When a disk has write caching disabled, we skip submission of a bio @@ -4108,25 +4134,24 @@ static void write_dev_flush(struct btrfs_device *device) /* * If the flush bio has been submitted by write_dev_flush, wait for it. + * Return true for any error, and false otherwise. */ -static blk_status_t wait_dev_flush(struct btrfs_device *device) +static bool wait_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; - if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) - return BLK_STS_OK; + if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) + return false; - clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); wait_for_completion_io(&device->flush_wait); - return bio->bi_status; -} + if (bio->bi_status) { + device->last_flush_error = bio->bi_status; + btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); + return true; + } -static int check_barrier_error(struct btrfs_fs_info *fs_info) -{ - if (!btrfs_check_rw_degradable(fs_info, NULL)) - return -EIO; - return 0; + return false; } /* @@ -4138,7 +4163,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info) struct list_head *head; struct btrfs_device *dev; int errors_wait = 0; - blk_status_t ret; lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ @@ -4153,7 +4177,6 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; write_dev_flush(dev); - dev->last_flush_error = BLK_STS_OK; } /* wait for all the barriers */ @@ -4168,23 +4191,17 @@ static int barrier_all_devices(struct btrfs_fs_info *info) !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; - ret = wait_dev_flush(dev); - if (ret) { - dev->last_flush_error = ret; - btrfs_dev_stat_inc_and_print(dev, - BTRFS_DEV_STAT_FLUSH_ERRS); + if (wait_dev_flush(dev)) errors_wait++; - } } - if (errors_wait) { - /* - * At some point we need the status of all disks - * to arrive at the volume status. So error checking - * is being pushed to a separate loop. - */ - return check_barrier_error(info); - } + /* + * Checks last_flush_error of disks in order to determine the device + * state. + */ + if (errors_wait && !btrfs_check_rw_degradable(info, NULL)) + return -EIO; + return 0; } @@ -4390,12 +4407,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[i]->root_key.objectid; err = btrfs_orphan_cleanup(gang[i]); if (err) - break; + goto out; btrfs_put_root(gang[i]); } root_objectid++; } - +out: /* release the uncleaned roots due to error */ for (; i < ret; i++) { if (gang[i]) @@ -4926,7 +4943,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) */ inode = igrab(&btrfs_inode->vfs_inode); if (inode) { + unsigned int nofs_flag; + + nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); + memalloc_nofs_restore(nofs_flag); iput(inode); } spin_lock(&root->delalloc_lock); @@ -5032,7 +5053,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache) inode = cache->io_ctl.inode; if (inode) { + unsigned int nofs_flag; + + nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); + memalloc_nofs_restore(nofs_flag); + BTRFS_I(inode)->generation = 0; cache->io_ctl.inode = NULL; iput(inode); |