diff options
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r-- | fs/btrfs/bio.c | 211 |
1 files changed, 123 insertions, 88 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 726592868e9c..5379c4714905 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -31,11 +31,11 @@ struct btrfs_failed_bio { * Initialize a btrfs_bio structure. This skips the embedded bio itself as it * is already initialized by the block layer. */ -void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, +void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info, btrfs_bio_end_io_t end_io, void *private) { memset(bbio, 0, offsetof(struct btrfs_bio, bio)); - bbio->inode = inode; + bbio->fs_info = fs_info; bbio->end_io = end_io; bbio->private = private; atomic_set(&bbio->pending_ios, 1); @@ -48,41 +48,58 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, * Just like the underlying bio_alloc_bioset it will not fail as it is backed by * a mempool. */ -struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, - struct btrfs_inode *inode, - btrfs_bio_end_io_t end_io, void *private) +struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, + struct btrfs_fs_info *fs_info, + btrfs_bio_end_io_t end_io, void *private) { + struct btrfs_bio *bbio; struct bio *bio; bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset); - btrfs_bio_init(btrfs_bio(bio), inode, end_io, private); - return bio; + bbio = btrfs_bio(bio); + btrfs_bio_init(bbio, fs_info, end_io, private); + return bbio; } -static struct bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, - struct bio *orig, u64 map_length, - bool use_append) +static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio) { - struct btrfs_bio *orig_bbio = btrfs_bio(orig); + struct btrfs_ordered_extent *ordered; + int ret; + + ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); + if (WARN_ON_ONCE(!ordered)) + return BLK_STS_IOERR; + ret = btrfs_extract_ordered_extent(bbio, ordered); + btrfs_put_ordered_extent(ordered); + + return errno_to_blk_status(ret); +} + +static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, + struct btrfs_bio *orig_bbio, + u64 map_length, bool use_append) +{ + struct btrfs_bio *bbio; struct bio *bio; if (use_append) { unsigned int nr_segs; - bio = bio_split_rw(orig, &fs_info->limits, &nr_segs, + bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs, &btrfs_clone_bioset, map_length); } else { - bio = bio_split(orig, map_length >> SECTOR_SHIFT, GFP_NOFS, - &btrfs_clone_bioset); + bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, + GFP_NOFS, &btrfs_clone_bioset); } - btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, NULL, orig_bbio); - - btrfs_bio(bio)->file_offset = orig_bbio->file_offset; - if (!(orig->bi_opf & REQ_BTRFS_ONE_ORDERED)) + bbio = btrfs_bio(bio); + btrfs_bio_init(bbio, fs_info, NULL, orig_bbio); + bbio->inode = orig_bbio->inode; + bbio->file_offset = orig_bbio->file_offset; + if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED)) orig_bbio->file_offset += map_length; atomic_inc(&orig_bbio->pending_ios); - return bio; + return bbio; } static void btrfs_orig_write_end_io(struct bio *bio); @@ -164,7 +181,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio, goto done; } - btrfs_submit_bio(&repair_bbio->bio, mirror); + btrfs_submit_bio(repair_bbio, mirror); return; } @@ -224,15 +241,16 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio, repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &btrfs_repair_bioset); repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector; - bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset); + __bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset); repair_bbio = btrfs_bio(repair_bio); - btrfs_bio_init(repair_bbio, failed_bbio->inode, NULL, fbio); + btrfs_bio_init(repair_bbio, fs_info, NULL, fbio); + repair_bbio->inode = failed_bbio->inode; repair_bbio->file_offset = failed_bbio->file_offset + bio_offset; mirror = next_repair_mirror(fbio, failed_bbio->mirror_num); btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror); - btrfs_submit_bio(repair_bio, mirror); + btrfs_submit_bio(repair_bbio, mirror); return fbio; } @@ -246,6 +264,9 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de struct btrfs_failed_bio *fbio = NULL; u32 offset = 0; + /* Read-repair requires the inode field to be set by the submitter. */ + ASSERT(inode); + /* * Hand off repair bios to the repair code as there is no upper level * submitter for them. @@ -306,17 +327,17 @@ static void btrfs_end_bio_work(struct work_struct *work) struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work); /* Metadata reads are checked and repaired by the submitter. */ - if (bbio->bio.bi_opf & REQ_META) - bbio->end_io(bbio); - else + if (bbio->inode && !(bbio->bio.bi_opf & REQ_META)) btrfs_check_read_bio(bbio, bbio->bio.bi_private); + else + bbio->end_io(bbio); } static void btrfs_simple_end_io(struct bio *bio) { struct btrfs_bio *bbio = btrfs_bio(bio); struct btrfs_device *dev = bio->bi_private; - struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; + struct btrfs_fs_info *fs_info = bbio->fs_info; btrfs_bio_counter_dec(fs_info); @@ -340,7 +361,8 @@ static void btrfs_raid56_end_io(struct bio *bio) btrfs_bio_counter_dec(bioc->fs_info); bbio->mirror_num = bioc->mirror_num; - if (bio_op(bio) == REQ_OP_READ && !(bbio->bio.bi_opf & REQ_META)) + if (bio_op(bio) == REQ_OP_READ && bbio->inode && + !(bbio->bio.bi_opf & REQ_META)) btrfs_check_read_bio(bbio, NULL); else btrfs_orig_bbio_end_io(bbio); @@ -418,7 +440,11 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) dev->devid, bio->bi_iter.bi_size); btrfsic_check_bio(bio); - submit_bio(bio); + + if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT) + blkcg_punt_bio_submit(bio); + else + submit_bio(bio); } static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr) @@ -534,10 +560,10 @@ static void run_one_async_done(struct btrfs_work *work) /* * All of the bios that pass through here are from async helpers. - * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context. - * This changes nothing when cgroups aren't in use. + * Use REQ_BTRFS_CGROUP_PUNT to issue them from the owning cgroup's + * context. This changes nothing when cgroups aren't in use. */ - bio->bi_opf |= REQ_CGROUP_PUNT; + bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT; __btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num); } @@ -562,7 +588,7 @@ static bool should_async_write(struct btrfs_bio *bbio) * in order. */ if (bbio->bio.bi_opf & REQ_META) { - struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; + struct btrfs_fs_info *fs_info = bbio->fs_info; if (btrfs_is_zoned(fs_info)) return false; @@ -582,7 +608,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio, struct btrfs_io_context *bioc, struct btrfs_io_stripe *smap, int mirror_num) { - struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; + struct btrfs_fs_info *fs_info = bbio->fs_info; struct async_submit_bio *async; async = kmalloc(sizeof(*async), GFP_NOFS); @@ -603,12 +629,12 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio, return true; } -static bool btrfs_submit_chunk(struct bio *bio, int mirror_num) +static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) { - struct btrfs_bio *bbio = btrfs_bio(bio); struct btrfs_inode *inode = bbio->inode; - struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_fs_info *fs_info = bbio->fs_info; struct btrfs_bio *orig_bbio = bbio; + struct bio *bio = &bbio->bio; u64 logical = bio->bi_iter.bi_sector << 9; u64 length = bio->bi_iter.bi_size; u64 map_length = length; @@ -631,15 +657,15 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num) map_length = min(map_length, fs_info->max_zone_append_size); if (map_length < length) { - bio = btrfs_split_bio(fs_info, bio, map_length, use_append); - bbio = btrfs_bio(bio); + bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append); + bio = &bbio->bio; } /* * Save the iter for the end_io handler and preload the checksums for * data reads. */ - if (bio_op(bio) == REQ_OP_READ && !(bio->bi_opf & REQ_META)) { + if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) { bbio->saved_iter = bio->bi_iter; ret = btrfs_lookup_bio_sums(bbio); if (ret) @@ -650,7 +676,7 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num) if (use_append) { bio->bi_opf &= ~REQ_OP_WRITE; bio->bi_opf |= REQ_OP_ZONE_APPEND; - ret = btrfs_extract_ordered_extent(btrfs_bio(bio)); + ret = btrfs_bio_extract_ordered_extent(bbio); if (ret) goto fail_put_bio; } @@ -659,7 +685,7 @@ static bool btrfs_submit_chunk(struct bio *bio, int mirror_num) * Csum items for reloc roots have already been cloned at this * point, so they are handled as part of the no-checksum case. */ - if (!(inode->flags & BTRFS_INODE_NODATASUM) && + if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) && !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) && !btrfs_is_data_reloc_root(inode->root)) { if (should_async_write(bbio) && @@ -686,9 +712,12 @@ fail: return true; } -void btrfs_submit_bio(struct bio *bio, int mirror_num) +void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num) { - while (!btrfs_submit_chunk(bio, mirror_num)) + /* If bbio->inode is not populated, its file_offset must be 0. */ + ASSERT(bbio->inode || bbio->file_offset == 0); + + while (!btrfs_submit_chunk(bbio, mirror_num)) ; } @@ -706,12 +735,9 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, u64 length, u64 logical, struct page *page, unsigned int pg_offset, int mirror_num) { - struct btrfs_device *dev; + struct btrfs_io_stripe smap = { 0 }; struct bio_vec bvec; struct bio bio; - u64 map_length = 0; - u64 sector; - struct btrfs_io_context *bioc = NULL; int ret = 0; ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); @@ -720,68 +746,38 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, if (btrfs_repair_one_zone(fs_info, logical)) return 0; - map_length = length; - /* * Avoid races with device replace and make sure our bioc has devices * associated to its stripes that don't go away while we are doing the * read repair operation. */ btrfs_bio_counter_inc_blocked(fs_info); - if (btrfs_is_parity_mirror(fs_info, logical, length)) { - /* - * Note that we don't use BTRFS_MAP_WRITE because it's supposed - * to update all raid stripes, but here we just want to correct - * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad - * stripe's dev and sector. - */ - ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &map_length, &bioc, 0); - if (ret) - goto out_counter_dec; - ASSERT(bioc->mirror_num == 1); - } else { - ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, - &map_length, &bioc, mirror_num); - if (ret) - goto out_counter_dec; - /* - * This happens when dev-replace is also running, and the - * mirror_num indicates the dev-replace target. - * - * In this case, we don't need to do anything, as the read - * error just means the replace progress hasn't reached our - * read range, and later replace routine would handle it well. - */ - if (mirror_num != bioc->mirror_num) - goto out_counter_dec; - } - - sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; - dev = bioc->stripes[bioc->mirror_num - 1].dev; - btrfs_put_bioc(bioc); + ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num); + if (ret < 0) + goto out_counter_dec; - if (!dev || !dev->bdev || - !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { + if (!smap.dev->bdev || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) { ret = -EIO; goto out_counter_dec; } - bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); - bio.bi_iter.bi_sector = sector; + bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); + bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT; __bio_add_page(&bio, page, length, pg_offset); btrfsic_check_bio(&bio); ret = submit_bio_wait(&bio); if (ret) { /* try to remap that extent elsewhere? */ - btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); + btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS); goto out_bio_uninit; } btrfs_info_rl_in_rcu(fs_info, "read error corrected: ino %llu off %llu (dev %s sector %llu)", - ino, start, btrfs_dev_name(dev), sector); + ino, start, btrfs_dev_name(smap.dev), + smap.physical >> SECTOR_SHIFT); ret = 0; out_bio_uninit: @@ -791,6 +787,45 @@ out_counter_dec: return ret; } +/* + * Submit a btrfs_bio based repair write. + * + * If @dev_replace is true, the write would be submitted to dev-replace target. + */ +void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace) +{ + struct btrfs_fs_info *fs_info = bbio->fs_info; + u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; + u64 length = bbio->bio.bi_iter.bi_size; + struct btrfs_io_stripe smap = { 0 }; + int ret; + + ASSERT(fs_info); + ASSERT(mirror_num > 0); + ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE); + ASSERT(!bbio->inode); + + btrfs_bio_counter_inc_blocked(fs_info); + ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num); + if (ret < 0) + goto fail; + + if (dev_replace) { + if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) { + bbio->bio.bi_opf &= ~REQ_OP_WRITE; + bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND; + } + ASSERT(smap.dev == fs_info->dev_replace.srcdev); + smap.dev = fs_info->dev_replace.tgtdev; + } + __btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num); + return; + +fail: + btrfs_bio_counter_dec(fs_info); + btrfs_bio_end_io(bbio, errno_to_blk_status(ret)); +} + int __init btrfs_bioset_init(void) { if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, |