diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
| -rw-r--r-- | fs/btrfs/disk-io.c | 146 | 
1 files changed, 94 insertions, 52 deletions
| diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cda51995c1e..a8f652dc940b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -45,6 +45,7 @@  #include "inode-map.h"  #include "check-integrity.h"  #include "rcu-string.h" +#include "dev-replace.h"  #ifdef CONFIG_X86  #include <asm/cpufeature.h> @@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,  		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))  			break; -		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, +		num_copies = btrfs_num_copies(root->fs_info,  					      eb->start, eb->len);  		if (num_copies == 1)  			break; @@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,  				 int mirror_num, unsigned long bio_flags,  				 u64 bio_offset)  { +	int ret; +  	/*  	 * when we're called for a write, we're already in the async  	 * submission context.  Just jump into btrfs_map_bio  	 */ -	return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); +	ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); +	if (ret) +		bio_endio(bio, ret); +	return ret;  }  static int check_async_write(struct inode *inode, unsigned long bio_flags) @@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,  	int ret;  	if (!(rw & REQ_WRITE)) { -  		/*  		 * called for a read, do the setup so that checksum validation  		 * can happen in the async kernel threads @@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,  		ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,  					  bio, 1);  		if (ret) -			return ret; -		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, -				     mirror_num, 0); +			goto out_w_error; +		ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, +				    mirror_num, 0);  	} else if (!async) {  		ret = btree_csum_one_bio(bio);  		if (ret) -			return ret; -		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, -				     mirror_num, 0); +			goto out_w_error; +		ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, +				    mirror_num, 0); +	} else { +		/* +		 * kthread helpers are used to submit writes so that +		 * checksumming can happen in parallel across all CPUs +		 */ +		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, +					  inode, rw, bio, mirror_num, 0, +					  bio_offset, +					  __btree_submit_bio_start, +					  __btree_submit_bio_done);  	} -	/* -	 * kthread helpers are used to submit writes so that checksumming -	 * can happen in parallel across all CPUs -	 */ -	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, -				   inode, rw, bio, mirror_num, 0, -				   bio_offset, -				   __btree_submit_bio_start, -				   __btree_submit_bio_done); +	if (ret) { +out_w_error: +		bio_endio(bio, ret); +	} +	return ret;  }  #ifdef CONFIG_MIGRATION @@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)  static int btree_set_page_dirty(struct page *page)  { +#ifdef DEBUG  	struct extent_buffer *eb;  	BUG_ON(!PagePrivate(page)); @@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page)  	BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));  	BUG_ON(!atomic_read(&eb->refs));  	btrfs_assert_tree_locked(eb); +#endif  	return __set_page_dirty_nobuffers(page);  } @@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,  					  root->fs_info->dirty_metadata_bytes);  			}  			spin_unlock(&root->fs_info->delalloc_lock); -		} -		/* ugh, clear_extent_buffer_dirty needs to lock the page */ -		btrfs_set_lock_blocking(buf); -		clear_extent_buffer_dirty(buf); +			/* ugh, clear_extent_buffer_dirty needs to lock the page */ +			btrfs_set_lock_blocking(buf); +			clear_extent_buffer_dirty(buf); +		}  	}  } @@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,  	root->root_key.objectid = objectid;  	root->anon_dev = 0; -	spin_lock_init(&root->root_times_lock); +	spin_lock_init(&root->root_item_lock);  }  static int __must_check find_and_setup_root(struct btrfs_root *tree_root, @@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb,  	init_rwsem(&fs_info->extent_commit_sem);  	init_rwsem(&fs_info->cleanup_work_sem);  	init_rwsem(&fs_info->subvol_sem); +	fs_info->dev_replace.lock_owner = 0; +	atomic_set(&fs_info->dev_replace.nesting_level, 0); +	mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); +	mutex_init(&fs_info->dev_replace.lock_management_lock); +	mutex_init(&fs_info->dev_replace.lock);  	spin_lock_init(&fs_info->qgroup_lock);  	fs_info->qgroup_tree = RB_ROOT; @@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb,  			   fs_info->thread_pool_size,  			   &fs_info->generic_worker); +	btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", +			   fs_info->thread_pool_size, +			   &fs_info->generic_worker); +  	btrfs_init_workers(&fs_info->submit_workers, "submit",  			   min_t(u64, fs_devices->num_devices,  			   fs_info->thread_pool_size), @@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb,  	ret |= btrfs_start_workers(&fs_info->delayed_workers);  	ret |= btrfs_start_workers(&fs_info->caching_workers);  	ret |= btrfs_start_workers(&fs_info->readahead_workers); +	ret |= btrfs_start_workers(&fs_info->flush_workers);  	if (ret) {  		err = -ENOMEM;  		goto fail_sb_buffer; @@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb,  		goto fail_tree_roots;  	} -	btrfs_close_extra_devices(fs_devices); +	/* +	 * keep the device that is marked to be the target device for the +	 * dev_replace procedure +	 */ +	btrfs_close_extra_devices(fs_info, fs_devices, 0);  	if (!fs_devices->latest_bdev) {  		printk(KERN_CRIT "btrfs: failed to read devices on %s\n", @@ -2490,6 +2517,14 @@ retry_root_backup:  		goto fail_block_groups;  	} +	ret = btrfs_init_dev_replace(fs_info); +	if (ret) { +		pr_err("btrfs: failed to init dev_replace: %d\n", ret); +		goto fail_block_groups; +	} + +	btrfs_close_extra_devices(fs_info, fs_devices, 1); +  	ret = btrfs_init_space_info(fs_info);  	if (ret) {  		printk(KERN_ERR "Failed to initial space info: %d\n", ret); @@ -2503,6 +2538,13 @@ retry_root_backup:  	}  	fs_info->num_tolerated_disk_barrier_failures =  		btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); +	if (fs_info->fs_devices->missing_devices > +	     fs_info->num_tolerated_disk_barrier_failures && +	    !(sb->s_flags & MS_RDONLY)) { +		printk(KERN_WARNING +		       "Btrfs: too many missing devices, writeable mount is not allowed\n"); +		goto fail_block_groups; +	}  	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,  					       "btrfs-cleaner"); @@ -2631,6 +2673,13 @@ retry_root_backup:  		return ret;  	} +	ret = btrfs_resume_dev_replace_async(fs_info); +	if (ret) { +		pr_warn("btrfs: failed to resume dev_replace\n"); +		close_ctree(tree_root); +		return ret; +	} +  	return 0;  fail_qgroup: @@ -2667,6 +2716,7 @@ fail_sb_buffer:  	btrfs_stop_workers(&fs_info->submit_workers);  	btrfs_stop_workers(&fs_info->delayed_workers);  	btrfs_stop_workers(&fs_info->caching_workers); +	btrfs_stop_workers(&fs_info->flush_workers);  fail_alloc:  fail_iput:  	btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root)  	smp_mb();  	/* pause restriper - we want to resume on mount */ -	btrfs_pause_balance(root->fs_info); +	btrfs_pause_balance(fs_info); + +	btrfs_dev_replace_suspend_for_unmount(fs_info); -	btrfs_scrub_cancel(root); +	btrfs_scrub_cancel(fs_info);  	/* wait for any defraggers to finish */  	wait_event(fs_info->transaction_wait,  		   (atomic_read(&fs_info->defrag_running) == 0));  	/* clear out the rbtree of defraggable inodes */ -	btrfs_run_defrag_inodes(fs_info); +	btrfs_cleanup_defrag_inodes(fs_info);  	if (!(fs_info->sb->s_flags & MS_RDONLY)) {  		ret = btrfs_commit_super(root); @@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root)  	btrfs_stop_workers(&fs_info->delayed_workers);  	btrfs_stop_workers(&fs_info->caching_workers);  	btrfs_stop_workers(&fs_info->readahead_workers); +	btrfs_stop_workers(&fs_info->flush_workers);  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY  	if (btrfs_test_opt(root, CHECK_INTEGRITY)) @@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)  	int was_dirty;  	btrfs_assert_tree_locked(buf); -	if (transid != root->fs_info->generation) { -		printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " +	if (transid != root->fs_info->generation) +		WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "  		       "found %llu running %llu\n",  			(unsigned long long)buf->start,  			(unsigned long long)transid,  			(unsigned long long)root->fs_info->generation); -		WARN_ON(1); -	}  	was_dirty = set_extent_buffer_dirty(buf);  	if (!was_dirty) {  		spin_lock(&root->fs_info->delalloc_lock); @@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)  	}  } -void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) +static void __btrfs_btree_balance_dirty(struct btrfs_root *root, +					int flush_delayed)  {  	/*  	 * looks as though older kernels can get into trouble with @@ -3411,36 +3463,26 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)  	if (current->flags & PF_MEMALLOC)  		return; -	btrfs_balance_delayed_items(root); +	if (flush_delayed) +		btrfs_balance_delayed_items(root);  	num_dirty = root->fs_info->dirty_metadata_bytes;  	if (num_dirty > thresh) { -		balance_dirty_pages_ratelimited_nr( -				   root->fs_info->btree_inode->i_mapping, 1); +		balance_dirty_pages_ratelimited( +				   root->fs_info->btree_inode->i_mapping);  	}  	return;  } -void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) +void btrfs_btree_balance_dirty(struct btrfs_root *root)  { -	/* -	 * looks as though older kernels can get into trouble with -	 * this code, they end up stuck in balance_dirty_pages forever -	 */ -	u64 num_dirty; -	unsigned long thresh = 32 * 1024 * 1024; - -	if (current->flags & PF_MEMALLOC) -		return; - -	num_dirty = root->fs_info->dirty_metadata_bytes; +	__btrfs_btree_balance_dirty(root, 1); +} -	if (num_dirty > thresh) { -		balance_dirty_pages_ratelimited_nr( -				   root->fs_info->btree_inode->i_mapping, 1); -	} -	return; +void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) +{ +	__btrfs_btree_balance_dirty(root, 0);  }  int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |