diff options
Diffstat (limited to 'fs/btrfs/bio.c')
| -rw-r--r-- | fs/btrfs/bio.c | 211 | 
1 files changed, 123 insertions, 88 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 726592868e9c..5379c4714905 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -31,11 +31,11 @@ struct btrfs_failed_bio {   * Initialize a btrfs_bio structure.  This skips the embedded bio itself as it   * is already initialized by the block layer.   */ -void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, +void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,  		    btrfs_bio_end_io_t end_io, void *private)  {  	memset(bbio, 0, offsetof(struct btrfs_bio, bio)); -	bbio->inode = inode; +	bbio->fs_info = fs_info;  	bbio->end_io = end_io;  	bbio->private = private;  	atomic_set(&bbio->pending_ios, 1); @@ -48,41 +48,58 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode,   * Just like the underlying bio_alloc_bioset it will not fail as it is backed by   * a mempool.   */ -struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, -			    struct btrfs_inode *inode, -			    btrfs_bio_end_io_t end_io, void *private) +struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, +				  struct btrfs_fs_info *fs_info, +				  btrfs_bio_end_io_t end_io, void *private)  { +	struct btrfs_bio *bbio;  	struct bio *bio;  	bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset); -	btrfs_bio_init(btrfs_bio(bio), inode, end_io, private); -	return bio; +	bbio = btrfs_bio(bio); +	btrfs_bio_init(bbio, fs_info, end_io, private); +	return bbio;  } -static struct bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, -				   struct bio *orig, u64 map_length, -				   bool use_append) +static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)  { -	struct btrfs_bio *orig_bbio = btrfs_bio(orig); +	struct btrfs_ordered_extent *ordered; +	int ret; + +	ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); +	if (WARN_ON_ONCE(!ordered)) +		return BLK_STS_IOERR; +	ret = btrfs_extract_ordered_extent(bbio, ordered); +	btrfs_put_ordered_extent(ordered); + +	return errno_to_blk_status(ret); +} + +static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, +					 struct btrfs_bio *orig_bbio, +					 u64 map_length, bool use_append) +{ +	struct btrfs_bio *bbio;  	struct bio *bio;  	if (use_append) {  		unsigned int nr_segs; -		bio = bio_split_rw(orig, &fs_info->limits, &nr_segs, +		bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs,  				   &btrfs_clone_bioset, map_length);  	} else { -		bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS, -				&btrfs_clone_bioset); +		bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, +				GFP_NOFS, &btrfs_clone_bioset);  	} -	btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio); - -	btrfs_bio(bio)->file_offset = orig_bbio->file_offset; -	if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED)) +	bbio = btrfs_bio(bio); +	btrfs_bio_init(bbio, fs_info, NULL, orig_bbio); +	bbio->inode = orig_bbio->inode; +	bbio->file_offset = orig_bbio->file_offset; +	if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))  		orig_bbio->file_offset += map_length;  	atomic_inc(&orig_bbio->pending_ios); -	return bio; +	return bbio;  }  static void btrfs_orig_write_end_io(struct bio *bio); @@ -164,7 +181,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,  			goto done;  		} -		btrfs_submit_bio(&repair_bbio->bio, mirror); +		btrfs_submit_bio(repair_bbio, mirror);  		return;  	} @@ -224,15 +241,16 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,  	repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,  				      &btrfs_repair_bioset);  	repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector; -	bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset); +	__bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);  	repair_bbio = btrfs_bio(repair_bio); -	btrfs_bio_init(repair_bbio, failed_bbio->inode, NULL, fbio); +	btrfs_bio_init(repair_bbio, fs_info, NULL, fbio); +	repair_bbio->inode = failed_bbio->inode;  	repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;  	mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);  	btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror); -	btrfs_submit_bio(repair_bio, mirror); +	btrfs_submit_bio(repair_bbio, mirror);  	return fbio;  } @@ -246,6 +264,9 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de  	struct btrfs_failed_bio *fbio = NULL;  	u32 offset = 0; +	/* Read-repair requires the inode field to be set by the submitter. */ +	ASSERT(inode); +  	/*  	 * Hand off repair bios to the repair code as there is no upper level  	 * submitter for them. @@ -306,17 +327,17 @@ static void btrfs_end_bio_work(struct work_struct *work)  	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);  	/* Metadata reads are checked and repaired by the submitter. */ -	if (bbio->bio.bi_opf & REQ_META) -		bbio->end_io(bbio); -	else +	if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))  		btrfs_check_read_bio(bbio, bbio->bio.bi_private); +	else +		bbio->end_io(bbio);  }  static void btrfs_simple_end_io(struct bio *bio)  {  	struct btrfs_bio *bbio = btrfs_bio(bio);  	struct btrfs_device *dev = bio->bi_private; -	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; +	struct btrfs_fs_info *fs_info = bbio->fs_info;  	btrfs_bio_counter_dec(fs_info); @@ -340,7 +361,8 @@ static void btrfs_raid56_end_io(struct bio *bio)  	btrfs_bio_counter_dec(bioc->fs_info);  	bbio->mirror_num = bioc->mirror_num; -	if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META)) +	if (bio_op(bio) == REQ_OP_READ && bbio->inode && +	    !(bbio->bio.bi_opf & REQ_META))  		btrfs_check_read_bio(bbio, NULL);  	else  		btrfs_orig_bbio_end_io(bbio); @@ -418,7 +440,11 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)  		dev->devid, bio->bi_iter.bi_size);  	btrfsic_check_bio(bio); -	submit_bio(bio); + +	if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT) +		blkcg_punt_bio_submit(bio); +	else +		submit_bio(bio);  }  static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr) @@ -534,10 +560,10 @@ static void run_one_async_done(struct btrfs_work *work)  	/*  	 * All of the bios that pass through here are from async helpers. -	 * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context. -	 * This changes nothing when cgroups aren't in use. +	 * Use REQ_BTRFS_CGROUP_PUNT to issue them from the owning cgroup's +	 * context.  This changes nothing when cgroups aren't in use.  	 */ -	bio->bi_opf |= REQ_CGROUP_PUNT; +	bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;  	__btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);  } @@ -562,7 +588,7 @@ static bool should_async_write(struct btrfs_bio *bbio)  	 * in order.  	 */  	if (bbio->bio.bi_opf & REQ_META) { -		struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; +		struct btrfs_fs_info *fs_info = bbio->fs_info;  		if (btrfs_is_zoned(fs_info))  			return false; @@ -582,7 +608,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,  				struct btrfs_io_context *bioc,  				struct btrfs_io_stripe *smap, int mirror_num)  { -	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; +	struct btrfs_fs_info *fs_info = bbio->fs_info;  	struct async_submit_bio *async;  	async = kmalloc(sizeof(*async), GFP_NOFS); @@ -603,12 +629,12 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,  	return true;  } -static bool btrfs_submit_chunk(struct bio *bio, int mirror_num) +static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)  { -	struct btrfs_bio *bbio = btrfs_bio(bio);  	struct btrfs_inode *inode = bbio->inode; -	struct btrfs_fs_info *fs_info = inode->root->fs_info; +	struct btrfs_fs_info *fs_info = bbio->fs_info;  	struct btrfs_bio *orig_bbio = bbio; +	struct bio *bio = &bbio->bio;  	u64 logical = bio->bi_iter.bi_sector << 9;  	u64 length = bio->bi_iter.bi_size;  	u64 map_length = length; @@ -631,15 +657,15 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)  		map_length = min(map_length, fs_info->max_zone_append_size);  	if (map_length < length) { -		bio = btrfs_split_bio(fs_info, bio, map_length, use_append); -		bbio = btrfs_bio(bio); +		bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append); +		bio = &bbio->bio;  	}  	/*  	 * Save the iter for the end_io handler and preload the checksums for  	 * data reads.  	 */ -	if (bio_op(bio) == REQ_OP_READ && !(bio->bi_opf & REQ_META)) { +	if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {  		bbio->saved_iter = bio->bi_iter;  		ret = btrfs_lookup_bio_sums(bbio);  		if (ret) @@ -650,7 +676,7 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)  		if (use_append) {  			bio->bi_opf &= ~REQ_OP_WRITE;  			bio->bi_opf |= REQ_OP_ZONE_APPEND; -			ret = btrfs_extract_ordered_extent(btrfs_bio(bio)); +			ret = btrfs_bio_extract_ordered_extent(bbio);  			if (ret)  				goto fail_put_bio;  		} @@ -659,7 +685,7 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num)  		 * Csum items for reloc roots have already been cloned at this  		 * point, so they are handled as part of the no-checksum case.  		 */ -		if (!(inode->flags & BTRFS_INODE_NODATASUM) && +		if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&  		    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&  		    !btrfs_is_data_reloc_root(inode->root)) {  			if (should_async_write(bbio) && @@ -686,9 +712,12 @@ fail:  	return true;  } -void btrfs_submit_bio(struct bio *bio, int mirror_num) +void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)  { -	while (!btrfs_submit_chunk(bio, mirror_num)) +	/* If bbio->inode is not populated, its file_offset must be 0. */ +	ASSERT(bbio->inode || bbio->file_offset == 0); + +	while (!btrfs_submit_chunk(bbio, mirror_num))  		;  } @@ -706,12 +735,9 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,  			    u64 length, u64 logical, struct page *page,  			    unsigned int pg_offset, int mirror_num)  { -	struct btrfs_device *dev; +	struct btrfs_io_stripe smap = { 0 };  	struct bio_vec bvec;  	struct bio bio; -	u64 map_length = 0; -	u64 sector; -	struct btrfs_io_context *bioc = NULL;  	int ret = 0;  	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); @@ -720,68 +746,38 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,  	if (btrfs_repair_one_zone(fs_info, logical))  		return 0; -	map_length = length; -  	/*  	 * Avoid races with device replace and make sure our bioc has devices  	 * associated to its stripes that don't go away while we are doing the  	 * read repair operation.  	 */  	btrfs_bio_counter_inc_blocked(fs_info); -	if (btrfs_is_parity_mirror(fs_info, logical, length)) { -		/* -		 * Note that we don't use BTRFS_MAP_WRITE because it's supposed -		 * to update all raid stripes, but here we just want to correct -		 * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad -		 * stripe's dev and sector. -		 */ -		ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, -				      &map_length, &bioc, 0); -		if (ret) -			goto out_counter_dec; -		ASSERT(bioc->mirror_num == 1); -	} else { -		ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, -				      &map_length, &bioc, mirror_num); -		if (ret) -			goto out_counter_dec; -		/* -		 * This happens when dev-replace is also running, and the -		 * mirror_num indicates the dev-replace target. -		 * -		 * In this case, we don't need to do anything, as the read -		 * error just means the replace progress hasn't reached our -		 * read range, and later replace routine would handle it well. -		 */ -		if (mirror_num != bioc->mirror_num) -			goto out_counter_dec; -	} - -	sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; -	dev = bioc->stripes[bioc->mirror_num - 1].dev; -	btrfs_put_bioc(bioc); +	ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num); +	if (ret < 0) +		goto out_counter_dec; -	if (!dev || !dev->bdev || -	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { +	if (!smap.dev->bdev || +	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) {  		ret = -EIO;  		goto out_counter_dec;  	} -	bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); -	bio.bi_iter.bi_sector = sector; +	bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); +	bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;  	__bio_add_page(&bio, page, length, pg_offset);  	btrfsic_check_bio(&bio);  	ret = submit_bio_wait(&bio);  	if (ret) {  		/* try to remap that extent elsewhere? */ -		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); +		btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);  		goto out_bio_uninit;  	}  	btrfs_info_rl_in_rcu(fs_info,  		"read error corrected: ino %llu off %llu (dev %s sector %llu)", -			     ino, start, btrfs_dev_name(dev), sector); +			     ino, start, btrfs_dev_name(smap.dev), +			     smap.physical >> SECTOR_SHIFT);  	ret = 0;  out_bio_uninit: @@ -791,6 +787,45 @@ out_counter_dec:  	return ret;  } +/* + * Submit a btrfs_bio based repair write. + * + * If @dev_replace is true, the write would be submitted to dev-replace target. + */ +void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace) +{ +	struct btrfs_fs_info *fs_info = bbio->fs_info; +	u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; +	u64 length = bbio->bio.bi_iter.bi_size; +	struct btrfs_io_stripe smap = { 0 }; +	int ret; + +	ASSERT(fs_info); +	ASSERT(mirror_num > 0); +	ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE); +	ASSERT(!bbio->inode); + +	btrfs_bio_counter_inc_blocked(fs_info); +	ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num); +	if (ret < 0) +		goto fail; + +	if (dev_replace) { +		if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) { +			bbio->bio.bi_opf &= ~REQ_OP_WRITE; +			bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND; +		} +		ASSERT(smap.dev == fs_info->dev_replace.srcdev); +		smap.dev = fs_info->dev_replace.tgtdev; +	} +	__btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num); +	return; + +fail: +	btrfs_bio_counter_dec(fs_info); +	btrfs_bio_end_io(bbio, errno_to_blk_status(ret)); +} +  int __init btrfs_bioset_init(void)  {  	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,  |