diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
| -rw-r--r-- | fs/btrfs/scrub.c | 679 | 
1 files changed, 7 insertions, 672 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6702896cdb8f..3be1456b5116 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -188,32 +188,6 @@ struct scrub_ctx {  	refcount_t              refs;  }; -struct scrub_fixup_nodatasum { -	struct scrub_ctx	*sctx; -	struct btrfs_device	*dev; -	u64			logical; -	struct btrfs_root	*root; -	struct btrfs_work	work; -	int			mirror_num; -}; - -struct scrub_nocow_inode { -	u64			inum; -	u64			offset; -	u64			root; -	struct list_head	list; -}; - -struct scrub_copy_nocow_ctx { -	struct scrub_ctx	*sctx; -	u64			logical; -	u64			len; -	int			mirror_num; -	u64			physical_for_dev_replace; -	struct list_head	inodes; -	struct btrfs_work	work; -}; -  struct scrub_warning {  	struct btrfs_path	*path;  	u64			extent_item_size; @@ -232,8 +206,6 @@ struct full_stripe_lock {  static void scrub_pending_bio_inc(struct scrub_ctx *sctx);  static void scrub_pending_bio_dec(struct scrub_ctx *sctx); -static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); -static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);  static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);  static int scrub_setup_recheck_block(struct scrub_block *original_sblock,  				     struct scrub_block *sblocks_for_recheck); @@ -277,13 +249,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,  static void scrub_wr_submit(struct scrub_ctx *sctx);  static void scrub_wr_bio_end_io(struct bio *bio);  static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); -static int write_page_nocow(struct scrub_ctx *sctx, -			    u64 physical_for_dev_replace, struct page *page); -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, -				      struct scrub_copy_nocow_ctx *ctx); -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, -			    int mirror_num, u64 physical_for_dev_replace); -static void copy_nocow_pages_worker(struct btrfs_work *work);  static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);  static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);  static void scrub_put_ctx(struct scrub_ctx *sctx); @@ -555,60 +520,6 @@ out:  	return ret;  } -/* - * used for workers that require transaction commits (i.e., for the - * NOCOW case) - */ -static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) -{ -	struct btrfs_fs_info *fs_info = sctx->fs_info; - -	refcount_inc(&sctx->refs); -	/* -	 * increment scrubs_running to prevent cancel requests from -	 * completing as long as a worker is running. we must also -	 * increment scrubs_paused to prevent deadlocking on pause -	 * requests used for transactions commits (as the worker uses a -	 * transaction context). it is safe to regard the worker -	 * as paused for all matters practical. effectively, we only -	 * avoid cancellation requests from completing. -	 */ -	mutex_lock(&fs_info->scrub_lock); -	atomic_inc(&fs_info->scrubs_running); -	atomic_inc(&fs_info->scrubs_paused); -	mutex_unlock(&fs_info->scrub_lock); - -	/* -	 * check if @scrubs_running=@scrubs_paused condition -	 * inside wait_event() is not an atomic operation. -	 * which means we may inc/dec @scrub_running/paused -	 * at any time. Let's wake up @scrub_pause_wait as -	 * much as we can to let commit transaction blocked less. -	 */ -	wake_up(&fs_info->scrub_pause_wait); - -	atomic_inc(&sctx->workers_pending); -} - -/* used for workers that require transaction commits */ -static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) -{ -	struct btrfs_fs_info *fs_info = sctx->fs_info; - -	/* -	 * see scrub_pending_trans_workers_inc() why we're pretending -	 * to be paused in the scrub counters -	 */ -	mutex_lock(&fs_info->scrub_lock); -	atomic_dec(&fs_info->scrubs_running); -	atomic_dec(&fs_info->scrubs_paused); -	mutex_unlock(&fs_info->scrub_lock); -	atomic_dec(&sctx->workers_pending); -	wake_up(&fs_info->scrub_pause_wait); -	wake_up(&sctx->list_wait); -	scrub_put_ctx(sctx); -} -  static void scrub_free_csums(struct scrub_ctx *sctx)  {  	while (!list_empty(&sctx->csum_list)) { @@ -882,194 +793,6 @@ out:  	btrfs_free_path(path);  } -static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) -{ -	struct page *page = NULL; -	unsigned long index; -	struct scrub_fixup_nodatasum *fixup = fixup_ctx; -	int ret; -	int corrected = 0; -	struct btrfs_key key; -	struct inode *inode = NULL; -	struct btrfs_fs_info *fs_info; -	u64 end = offset + PAGE_SIZE - 1; -	struct btrfs_root *local_root; -	int srcu_index; - -	key.objectid = root; -	key.type = BTRFS_ROOT_ITEM_KEY; -	key.offset = (u64)-1; - -	fs_info = fixup->root->fs_info; -	srcu_index = srcu_read_lock(&fs_info->subvol_srcu); - -	local_root = btrfs_read_fs_root_no_name(fs_info, &key); -	if (IS_ERR(local_root)) { -		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); -		return PTR_ERR(local_root); -	} - -	key.type = BTRFS_INODE_ITEM_KEY; -	key.objectid = inum; -	key.offset = 0; -	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); -	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); -	if (IS_ERR(inode)) -		return PTR_ERR(inode); - -	index = offset >> PAGE_SHIFT; - -	page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); -	if (!page) { -		ret = -ENOMEM; -		goto out; -	} - -	if (PageUptodate(page)) { -		if (PageDirty(page)) { -			/* -			 * we need to write the data to the defect sector. the -			 * data that was in that sector is not in memory, -			 * because the page was modified. we must not write the -			 * modified page to that sector. -			 * -			 * TODO: what could be done here: wait for the delalloc -			 *       runner to write out that page (might involve -			 *       COW) and see whether the sector is still -			 *       referenced afterwards. -			 * -			 * For the meantime, we'll treat this error -			 * incorrectable, although there is a chance that a -			 * later scrub will find the bad sector again and that -			 * there's no dirty page in memory, then. -			 */ -			ret = -EIO; -			goto out; -		} -		ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE, -					fixup->logical, page, -					offset - page_offset(page), -					fixup->mirror_num); -		unlock_page(page); -		corrected = !ret; -	} else { -		/* -		 * we need to get good data first. the general readpage path -		 * will call repair_io_failure for us, we just have to make -		 * sure we read the bad mirror. -		 */ -		ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, -					EXTENT_DAMAGED); -		if (ret) { -			/* set_extent_bits should give proper error */ -			WARN_ON(ret > 0); -			if (ret > 0) -				ret = -EFAULT; -			goto out; -		} - -		ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page, -						btrfs_get_extent, -						fixup->mirror_num); -		wait_on_page_locked(page); - -		corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, -						end, EXTENT_DAMAGED, 0, NULL); -		if (!corrected) -			clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, -						EXTENT_DAMAGED); -	} - -out: -	if (page) -		put_page(page); - -	iput(inode); - -	if (ret < 0) -		return ret; - -	if (ret == 0 && corrected) { -		/* -		 * we only need to call readpage for one of the inodes belonging -		 * to this extent. so make iterate_extent_inodes stop -		 */ -		return 1; -	} - -	return -EIO; -} - -static void scrub_fixup_nodatasum(struct btrfs_work *work) -{ -	struct btrfs_fs_info *fs_info; -	int ret; -	struct scrub_fixup_nodatasum *fixup; -	struct scrub_ctx *sctx; -	struct btrfs_trans_handle *trans = NULL; -	struct btrfs_path *path; -	int uncorrectable = 0; - -	fixup = container_of(work, struct scrub_fixup_nodatasum, work); -	sctx = fixup->sctx; -	fs_info = fixup->root->fs_info; - -	path = btrfs_alloc_path(); -	if (!path) { -		spin_lock(&sctx->stat_lock); -		++sctx->stat.malloc_errors; -		spin_unlock(&sctx->stat_lock); -		uncorrectable = 1; -		goto out; -	} - -	trans = btrfs_join_transaction(fixup->root); -	if (IS_ERR(trans)) { -		uncorrectable = 1; -		goto out; -	} - -	/* -	 * the idea is to trigger a regular read through the standard path. we -	 * read a page from the (failed) logical address by specifying the -	 * corresponding copynum of the failed sector. thus, that readpage is -	 * expected to fail. -	 * that is the point where on-the-fly error correction will kick in -	 * (once it's finished) and rewrite the failed sector if a good copy -	 * can be found. -	 */ -	ret = iterate_inodes_from_logical(fixup->logical, fs_info, path, -					  scrub_fixup_readpage, fixup, false); -	if (ret < 0) { -		uncorrectable = 1; -		goto out; -	} -	WARN_ON(ret != 1); - -	spin_lock(&sctx->stat_lock); -	++sctx->stat.corrected_errors; -	spin_unlock(&sctx->stat_lock); - -out: -	if (trans && !IS_ERR(trans)) -		btrfs_end_transaction(trans); -	if (uncorrectable) { -		spin_lock(&sctx->stat_lock); -		++sctx->stat.uncorrectable_errors; -		spin_unlock(&sctx->stat_lock); -		btrfs_dev_replace_stats_inc( -			&fs_info->dev_replace.num_uncorrectable_read_errors); -		btrfs_err_rl_in_rcu(fs_info, -		    "unable to fixup (nodatasum) error at logical %llu on dev %s", -			fixup->logical, rcu_str_deref(fixup->dev->name)); -	} - -	btrfs_free_path(path); -	kfree(fixup); - -	scrub_pending_trans_workers_dec(sctx); -} -  static inline void scrub_get_recover(struct scrub_recover *recover)  {  	refcount_inc(&recover->refs); @@ -1264,42 +987,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)  	}  	/* -	 * NOTE: Even for nodatasum case, it's still possible that it's a -	 * compressed data extent, thus scrub_fixup_nodatasum(), which write -	 * inode page cache onto disk, could cause serious data corruption. -	 * -	 * So here we could only read from disk, and hope our recovery could -	 * reach disk before the newer write. -	 */ -	if (0 && !is_metadata && !have_csum) { -		struct scrub_fixup_nodatasum *fixup_nodatasum; - -		WARN_ON(sctx->is_dev_replace); - -		/* -		 * !is_metadata and !have_csum, this means that the data -		 * might not be COWed, that it might be modified -		 * concurrently. The general strategy to work on the -		 * commit root does not help in the case when COW is not -		 * used. -		 */ -		fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); -		if (!fixup_nodatasum) -			goto did_not_correct_error; -		fixup_nodatasum->sctx = sctx; -		fixup_nodatasum->dev = dev; -		fixup_nodatasum->logical = logical; -		fixup_nodatasum->root = fs_info->extent_root; -		fixup_nodatasum->mirror_num = failed_mirror_index + 1; -		scrub_pending_trans_workers_inc(sctx); -		btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, -				scrub_fixup_nodatasum, NULL, NULL); -		btrfs_queue_work(fs_info->scrub_workers, -				 &fixup_nodatasum->work); -		goto out; -	} - -	/*  	 * now build and submit the bios for the other mirrors, check  	 * checksums.  	 * First try to pick the mirror which is completely without I/O @@ -1866,7 +1553,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,  		bio = btrfs_io_bio_alloc(1);  		bio_set_dev(bio, page_bad->dev->bdev);  		bio->bi_iter.bi_sector = page_bad->physical >> 9; -		bio_set_op_attrs(bio, REQ_OP_WRITE, 0); +		bio->bi_opf = REQ_OP_WRITE;  		ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);  		if (PAGE_SIZE != ret) { @@ -1961,7 +1648,7 @@ again:  		bio->bi_end_io = scrub_wr_bio_end_io;  		bio_set_dev(bio, sbio->dev->bdev);  		bio->bi_iter.bi_sector = sbio->physical >> 9; -		bio_set_op_attrs(bio, REQ_OP_WRITE, 0); +		bio->bi_opf = REQ_OP_WRITE;  		sbio->status = 0;  	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=  		   spage->physical_for_dev_replace || @@ -2361,7 +2048,7 @@ again:  		bio->bi_end_io = scrub_bio_end_io;  		bio_set_dev(bio, sbio->dev->bdev);  		bio->bi_iter.bi_sector = sbio->physical >> 9; -		bio_set_op_attrs(bio, REQ_OP_READ, 0); +		bio->bi_opf = REQ_OP_READ;  		sbio->status = 0;  	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=  		   spage->physical || @@ -2800,17 +2487,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,  			have_csum = scrub_find_csum(sctx, logical, csum);  			if (have_csum == 0)  				++sctx->stat.no_csum; -			if (0 && sctx->is_dev_replace && !have_csum) { -				ret = copy_nocow_pages(sctx, logical, l, -						       mirror_num, -						      physical_for_dev_replace); -				goto behind_scrub_pages; -			}  		}  		ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,  				  mirror_num, have_csum ? csum : NULL, 0,  				  physical_for_dev_replace); -behind_scrub_pages:  		if (ret)  			return ret;  		len -= l; @@ -3863,7 +3543,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		 * -> btrfs_scrub_pause()  		 */  		scrub_pause_on(fs_info); -		ret = btrfs_inc_block_group_ro(fs_info, cache); +		ret = btrfs_inc_block_group_ro(cache);  		if (!ret && is_dev_replace) {  			/*  			 * If we are doing a device replace wait for any tasks @@ -3982,14 +3662,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		if (!cache->removed && !cache->ro && cache->reserved == 0 &&  		    btrfs_block_group_used(&cache->item) == 0) {  			spin_unlock(&cache->lock); -			spin_lock(&fs_info->unused_bgs_lock); -			if (list_empty(&cache->bg_list)) { -				btrfs_get_block_group(cache); -				trace_btrfs_add_unused_block_group(cache); -				list_add_tail(&cache->bg_list, -					      &fs_info->unused_bgs); -			} -			spin_unlock(&fs_info->unused_bgs_lock); +			btrfs_mark_bg_unused(cache);  		} else {  			spin_unlock(&cache->lock);  		} @@ -4072,10 +3745,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,  		if (!fs_info->scrub_wr_completion_workers)  			goto fail_scrub_wr_completion_workers; -		fs_info->scrub_nocow_workers = -			btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0); -		if (!fs_info->scrub_nocow_workers) -			goto fail_scrub_nocow_workers;  		fs_info->scrub_parity_workers =  			btrfs_alloc_workqueue(fs_info, "scrubparity", flags,  					      max_active, 2); @@ -4086,8 +3755,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,  	return 0;  fail_scrub_parity_workers: -	btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); -fail_scrub_nocow_workers:  	btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);  fail_scrub_wr_completion_workers:  	btrfs_destroy_workqueue(fs_info->scrub_workers); @@ -4100,7 +3767,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)  	if (--fs_info->scrub_workers_refcnt == 0) {  		btrfs_destroy_workqueue(fs_info->scrub_workers);  		btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); -		btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);  		btrfs_destroy_workqueue(fs_info->scrub_parity_workers);  	}  	WARN_ON(fs_info->scrub_workers_refcnt < 0); @@ -4113,7 +3779,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,  	struct scrub_ctx *sctx;  	int ret;  	struct btrfs_device *dev; -	struct rcu_string *name;  	if (btrfs_fs_closing(fs_info))  		return -EINVAL; @@ -4167,11 +3832,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,  	if (!is_dev_replace && !readonly &&  	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {  		mutex_unlock(&fs_info->fs_devices->device_list_mutex); -		rcu_read_lock(); -		name = rcu_dereference(dev->name); -		btrfs_err(fs_info, "scrub: device %s is not writable", -			  name->str); -		rcu_read_unlock(); +		btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", +				rcu_str_deref(dev->name));  		return -EROFS;  	} @@ -4359,330 +4021,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,  	*extent_dev = bbio->stripes[0].dev;  	btrfs_put_bbio(bbio);  } - -static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, -			    int mirror_num, u64 physical_for_dev_replace) -{ -	struct scrub_copy_nocow_ctx *nocow_ctx; -	struct btrfs_fs_info *fs_info = sctx->fs_info; - -	nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); -	if (!nocow_ctx) { -		spin_lock(&sctx->stat_lock); -		sctx->stat.malloc_errors++; -		spin_unlock(&sctx->stat_lock); -		return -ENOMEM; -	} - -	scrub_pending_trans_workers_inc(sctx); - -	nocow_ctx->sctx = sctx; -	nocow_ctx->logical = logical; -	nocow_ctx->len = len; -	nocow_ctx->mirror_num = mirror_num; -	nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; -	btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, -			copy_nocow_pages_worker, NULL, NULL); -	INIT_LIST_HEAD(&nocow_ctx->inodes); -	btrfs_queue_work(fs_info->scrub_nocow_workers, -			 &nocow_ctx->work); - -	return 0; -} - -static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx) -{ -	struct scrub_copy_nocow_ctx *nocow_ctx = ctx; -	struct scrub_nocow_inode *nocow_inode; - -	nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS); -	if (!nocow_inode) -		return -ENOMEM; -	nocow_inode->inum = inum; -	nocow_inode->offset = offset; -	nocow_inode->root = root; -	list_add_tail(&nocow_inode->list, &nocow_ctx->inodes); -	return 0; -} - -#define COPY_COMPLETE 1 - -static void copy_nocow_pages_worker(struct btrfs_work *work) -{ -	struct scrub_copy_nocow_ctx *nocow_ctx = -		container_of(work, struct scrub_copy_nocow_ctx, work); -	struct scrub_ctx *sctx = nocow_ctx->sctx; -	struct btrfs_fs_info *fs_info = sctx->fs_info; -	struct btrfs_root *root = fs_info->extent_root; -	u64 logical = nocow_ctx->logical; -	u64 len = nocow_ctx->len; -	int mirror_num = nocow_ctx->mirror_num; -	u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; -	int ret; -	struct btrfs_trans_handle *trans = NULL; -	struct btrfs_path *path; -	int not_written = 0; - -	path = btrfs_alloc_path(); -	if (!path) { -		spin_lock(&sctx->stat_lock); -		sctx->stat.malloc_errors++; -		spin_unlock(&sctx->stat_lock); -		not_written = 1; -		goto out; -	} - -	trans = btrfs_join_transaction(root); -	if (IS_ERR(trans)) { -		not_written = 1; -		goto out; -	} - -	ret = iterate_inodes_from_logical(logical, fs_info, path, -			record_inode_for_nocow, nocow_ctx, false); -	if (ret != 0 && ret != -ENOENT) { -		btrfs_warn(fs_info, -			   "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", -			   logical, physical_for_dev_replace, len, mirror_num, -			   ret); -		not_written = 1; -		goto out; -	} - -	btrfs_end_transaction(trans); -	trans = NULL; -	while (!list_empty(&nocow_ctx->inodes)) { -		struct scrub_nocow_inode *entry; -		entry = list_first_entry(&nocow_ctx->inodes, -					 struct scrub_nocow_inode, -					 list); -		list_del_init(&entry->list); -		ret = copy_nocow_pages_for_inode(entry->inum, entry->offset, -						 entry->root, nocow_ctx); -		kfree(entry); -		if (ret == COPY_COMPLETE) { -			ret = 0; -			break; -		} else if (ret) { -			break; -		} -	} -out: -	while (!list_empty(&nocow_ctx->inodes)) { -		struct scrub_nocow_inode *entry; -		entry = list_first_entry(&nocow_ctx->inodes, -					 struct scrub_nocow_inode, -					 list); -		list_del_init(&entry->list); -		kfree(entry); -	} -	if (trans && !IS_ERR(trans)) -		btrfs_end_transaction(trans); -	if (not_written) -		btrfs_dev_replace_stats_inc(&fs_info->dev_replace. -					    num_uncorrectable_read_errors); - -	btrfs_free_path(path); -	kfree(nocow_ctx); - -	scrub_pending_trans_workers_dec(sctx); -} - -static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len, -				 u64 logical) -{ -	struct extent_state *cached_state = NULL; -	struct btrfs_ordered_extent *ordered; -	struct extent_io_tree *io_tree; -	struct extent_map *em; -	u64 lockstart = start, lockend = start + len - 1; -	int ret = 0; - -	io_tree = &inode->io_tree; - -	lock_extent_bits(io_tree, lockstart, lockend, &cached_state); -	ordered = btrfs_lookup_ordered_range(inode, lockstart, len); -	if (ordered) { -		btrfs_put_ordered_extent(ordered); -		ret = 1; -		goto out_unlock; -	} - -	em = btrfs_get_extent(inode, NULL, 0, start, len, 0); -	if (IS_ERR(em)) { -		ret = PTR_ERR(em); -		goto out_unlock; -	} - -	/* -	 * This extent does not actually cover the logical extent anymore, -	 * move on to the next inode. -	 */ -	if (em->block_start > logical || -	    em->block_start + em->block_len < logical + len || -	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { -		free_extent_map(em); -		ret = 1; -		goto out_unlock; -	} -	free_extent_map(em); - -out_unlock: -	unlock_extent_cached(io_tree, lockstart, lockend, &cached_state); -	return ret; -} - -static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, -				      struct scrub_copy_nocow_ctx *nocow_ctx) -{ -	struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info; -	struct btrfs_key key; -	struct inode *inode; -	struct page *page; -	struct btrfs_root *local_root; -	struct extent_io_tree *io_tree; -	u64 physical_for_dev_replace; -	u64 nocow_ctx_logical; -	u64 len = nocow_ctx->len; -	unsigned long index; -	int srcu_index; -	int ret = 0; -	int err = 0; - -	key.objectid = root; -	key.type = BTRFS_ROOT_ITEM_KEY; -	key.offset = (u64)-1; - -	srcu_index = srcu_read_lock(&fs_info->subvol_srcu); - -	local_root = btrfs_read_fs_root_no_name(fs_info, &key); -	if (IS_ERR(local_root)) { -		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); -		return PTR_ERR(local_root); -	} - -	key.type = BTRFS_INODE_ITEM_KEY; -	key.objectid = inum; -	key.offset = 0; -	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); -	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); -	if (IS_ERR(inode)) -		return PTR_ERR(inode); - -	/* Avoid truncate/dio/punch hole.. */ -	inode_lock(inode); -	inode_dio_wait(inode); - -	physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; -	io_tree = &BTRFS_I(inode)->io_tree; -	nocow_ctx_logical = nocow_ctx->logical; - -	ret = check_extent_to_block(BTRFS_I(inode), offset, len, -			nocow_ctx_logical); -	if (ret) { -		ret = ret > 0 ? 0 : ret; -		goto out; -	} - -	while (len >= PAGE_SIZE) { -		index = offset >> PAGE_SHIFT; -again: -		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); -		if (!page) { -			btrfs_err(fs_info, "find_or_create_page() failed"); -			ret = -ENOMEM; -			goto out; -		} - -		if (PageUptodate(page)) { -			if (PageDirty(page)) -				goto next_page; -		} else { -			ClearPageError(page); -			err = extent_read_full_page(io_tree, page, -							   btrfs_get_extent, -							   nocow_ctx->mirror_num); -			if (err) { -				ret = err; -				goto next_page; -			} - -			lock_page(page); -			/* -			 * If the page has been remove from the page cache, -			 * the data on it is meaningless, because it may be -			 * old one, the new data may be written into the new -			 * page in the page cache. -			 */ -			if (page->mapping != inode->i_mapping) { -				unlock_page(page); -				put_page(page); -				goto again; -			} -			if (!PageUptodate(page)) { -				ret = -EIO; -				goto next_page; -			} -		} - -		ret = check_extent_to_block(BTRFS_I(inode), offset, len, -					    nocow_ctx_logical); -		if (ret) { -			ret = ret > 0 ? 0 : ret; -			goto next_page; -		} - -		err = write_page_nocow(nocow_ctx->sctx, -				       physical_for_dev_replace, page); -		if (err) -			ret = err; -next_page: -		unlock_page(page); -		put_page(page); - -		if (ret) -			break; - -		offset += PAGE_SIZE; -		physical_for_dev_replace += PAGE_SIZE; -		nocow_ctx_logical += PAGE_SIZE; -		len -= PAGE_SIZE; -	} -	ret = COPY_COMPLETE; -out: -	inode_unlock(inode); -	iput(inode); -	return ret; -} - -static int write_page_nocow(struct scrub_ctx *sctx, -			    u64 physical_for_dev_replace, struct page *page) -{ -	struct bio *bio; -	struct btrfs_device *dev; - -	dev = sctx->wr_tgtdev; -	if (!dev) -		return -EIO; -	if (!dev->bdev) { -		btrfs_warn_rl(dev->fs_info, -			"scrub write_page_nocow(bdev == NULL) is unexpected"); -		return -EIO; -	} -	bio = btrfs_io_bio_alloc(1); -	bio->bi_iter.bi_size = 0; -	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; -	bio_set_dev(bio, dev->bdev); -	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; -	/* bio_add_page won't fail on a freshly allocated bio */ -	bio_add_page(bio, page, PAGE_SIZE, 0); - -	if (btrfsic_submit_bio_wait(bio)) { -		bio_put(bio); -		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); -		return -EIO; -	} - -	bio_put(bio); -	return 0; -}  |