From bf9486d6dd2351f6cfff9a8df87657a1248a918d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:16 -0700 Subject: fs/btrfs: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Acked-by: David Sterba Cc: Josef Bacik Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-51-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/btrfs/compression.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f4564f32f6d9..a82b9f17f476 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -455,7 +455,7 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr, - unsigned int opf, bio_end_io_t endio_func, + blk_opf_t opf, bio_end_io_t endio_func, u64 *next_stripe_start) { struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); @@ -505,7 +505,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, unsigned int compressed_len, struct page **compressed_pages, unsigned int nr_pages, - unsigned int write_flags, + blk_opf_t write_flags, struct cgroup_subsys_state *blkcg_css, bool writeback) { @@ -517,7 +517,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, blk_status_t ret; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; const bool use_append = btrfs_use_zone_append(inode, disk_start); - const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; + const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && IS_ALIGNED(len, fs_info->sectorsize)); -- cgit From ae643a74ebdb150b004601d0d5fe5a1faba9b04d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 22 May 2022 13:47:48 +0200 Subject: btrfs: introduce a data checksum checking helper Although we have several data csum verification code, we never have a function really just to verify checksum for one sector. Function check_data_csum() do extra work for error reporting, thus it requires a lot of extra things like file offset, bio_offset etc. Function btrfs_verify_data_csum() is even worse, it will utilize page checked flag, which means it can not be utilized for direct IO pages. Here we introduce a new helper, btrfs_check_sector_csum(), which really only accept a sector in page, and expected checksum pointer. We use this function to implement check_data_csum(), and export it for incoming patch. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo [hch: keep passing the csum array as an arguments, as the callers want to print it, rename per request] Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 13 ++++--------- fs/btrfs/ctree.h | 2 ++ fs/btrfs/inode.c | 38 ++++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f4564f32f6d9..6ab82e142f1f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -147,12 +147,10 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, u64 disk_start) { struct btrfs_fs_info *fs_info = inode->root->fs_info; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); const u32 csum_size = fs_info->csum_size; const u32 sectorsize = fs_info->sectorsize; struct page *page; unsigned int i; - char *kaddr; u8 csum[BTRFS_CSUM_SIZE]; struct compressed_bio *cb = bio->bi_private; u8 *cb_sum = cb->sums; @@ -161,8 +159,6 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) return 0; - shash->tfm = fs_info->csum_shash; - for (i = 0; i < cb->nr_pages; i++) { u32 pg_offset; u32 bytes_left = PAGE_SIZE; @@ -175,12 +171,11 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, /* Hash through the page sector by sector */ for (pg_offset = 0; pg_offset < bytes_left; pg_offset += sectorsize) { - kaddr = kmap_atomic(page); - crypto_shash_digest(shash, kaddr + pg_offset, - sectorsize, csum); - kunmap_atomic(kaddr); + int ret; - if (memcmp(&csum, cb_sum, csum_size) != 0) { + ret = btrfs_check_sector_csum(fs_info, page, pg_offset, + csum, cb_sum); + if (ret) { btrfs_print_data_csum_error(inode, disk_start, csum, cb_sum, cb->mirror_num); if (btrfs_bio(bio)->device) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad31cc5cdd50..6e65778040ed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3253,6 +3253,8 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path); /* inode.c */ void btrfs_submit_data_bio(struct inode *inode, struct bio *bio, int mirror_num, enum btrfs_compression_type compress_type); +int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, + u32 pgoff, u8 *csum, const u8 * const csum_expected); unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, u32 bio_offset, struct page *page, u64 start, u64 end); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 332b8f1bf609..193931b3c20a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3328,6 +3328,29 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, finish_ordered_fn, uptodate); } +/* + * Verify the checksum for a single sector without any extra action that depend + * on the type of I/O. + */ +int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, + u32 pgoff, u8 *csum, const u8 * const csum_expected) +{ + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + char *kaddr; + + ASSERT(pgoff + fs_info->sectorsize <= PAGE_SIZE); + + shash->tfm = fs_info->csum_shash; + + kaddr = kmap_local_page(page) + pgoff; + crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum); + kunmap_local(kaddr); + + if (memcmp(csum, csum_expected, fs_info->csum_size)) + return -EIO; + return 0; +} + /* * check_data_csum - verify checksum of one sector of uncompressed data * @inode: inode @@ -3338,14 +3361,15 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, * @start: logical offset in the file * * The length of such check is always one sector size. + * + * When csum mismatch is detected, we will also report the error and fill the + * corrupted range with zero. (Thus it needs the extra parameters) */ static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset, struct page *page, u32 pgoff, u64 start) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - char *kaddr; u32 len = fs_info->sectorsize; const u32 csum_size = fs_info->csum_size; unsigned int offset_sectors; @@ -3357,16 +3381,10 @@ static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, offset_sectors = bio_offset >> fs_info->sectorsize_bits; csum_expected = ((u8 *)bbio->csum) + offset_sectors * csum_size; - kaddr = kmap_atomic(page); - shash->tfm = fs_info->csum_shash; - - crypto_shash_digest(shash, kaddr + pgoff, len, csum); - kunmap_atomic(kaddr); - - if (memcmp(csum, csum_expected, csum_size)) + if (btrfs_check_sector_csum(fs_info, page, pgoff, csum, csum_expected)) goto zeroit; - return 0; + zeroit: btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, bbio->mirror_num); -- cgit From 21a8935ead31c09a8ecb06e3b7a5289a630645ac Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 1 Jun 2022 13:47:54 +0200 Subject: btrfs: remove redundant calls to flush_dcache_page Both memzero_page and memcpy_to_page already call flush_dcache_page so we can remove the calls from btrfs code. Reviewed-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 -- fs/btrfs/extent_io.c | 7 +------ fs/btrfs/inode.c | 6 ++---- fs/btrfs/reflink.c | 5 +---- 4 files changed, 4 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6ab82e142f1f..2536754656b6 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -760,7 +760,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, int zeros; zeros = PAGE_SIZE - zero_offset; memzero_page(page, zero_offset, zeros); - flush_dcache_page(page); } } @@ -1476,7 +1475,6 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, ASSERT(copy_start - decompressed < buf_len); memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + copy_start - decompressed, copy_len); - flush_dcache_page(bvec.bv_page); cur_offset += copy_len; bio_advance(orig_bio, copy_len); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 62425d1494a9..69b6b4ba009e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3641,7 +3641,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, if (zero_offset) { iosize = PAGE_SIZE - zero_offset; memzero_page(page, zero_offset, iosize); - flush_dcache_page(page); } } begin_page_read(fs_info, page); @@ -3656,7 +3655,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, iosize = PAGE_SIZE - pg_offset; memzero_page(page, pg_offset, iosize); - flush_dcache_page(page); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, @@ -3740,7 +3738,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, struct extent_state *cached = NULL; memzero_page(page, pg_offset, iosize); - flush_dcache_page(page); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); @@ -4158,10 +4155,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, return 0; } - if (page->index == end_index) { + if (page->index == end_index) memzero_page(page, pg_offset, PAGE_SIZE - pg_offset); - flush_dcache_page(page); - } ret = set_page_extent_mapped(page); if (ret < 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 76e493e2d9b2..f96e332bfe96 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4873,7 +4873,6 @@ again: else memzero_page(page, (block_start - page_offset(page)) + offset, len); - flush_dcache_page(page); } btrfs_page_clear_checked(fs_info, page, block_start, block_end + 1 - block_start); @@ -8598,10 +8597,9 @@ again: else zero_start = PAGE_SIZE; - if (zero_start != PAGE_SIZE) { + if (zero_start != PAGE_SIZE) memzero_page(page, zero_start, PAGE_SIZE - zero_start); - flush_dcache_page(page); - } + btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE); btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start); btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start); diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index a3549d587464..e30f53bd4e55 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -110,7 +110,6 @@ static int copy_inline_to_page(struct btrfs_inode *inode, if (comp_type == BTRFS_COMPRESS_NONE) { memcpy_to_page(page, offset_in_page(file_offset), data_start, datal); - flush_dcache_page(page); } else { ret = btrfs_decompress(comp_type, data_start, page, offset_in_page(file_offset), @@ -132,10 +131,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode, * * So what's in the range [500, 4095] corresponds to zeroes. */ - if (datal < block_size) { + if (datal < block_size) memzero_page(page, datal, block_size - datal); - flush_dcache_page(page); - } btrfs_page_set_uptodate(fs_info, page, file_offset, block_size); btrfs_page_clear_checked(fs_info, page, file_offset, block_size); -- cgit From fed8a72df126fdf03bf6bd46d83be9ff3bd90892 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 May 2022 09:36:38 +0200 Subject: btrfs: don't use btrfs_bio_wq_end_io for compressed writes Compressed write bio completion is the only user of btrfs_bio_wq_end_io for writes, and the use of btrfs_bio_wq_end_io is a little suboptimal here as we only real need user context for the final completion of a compressed_bio structure, and not every single bio completion. Add a work_struct to struct compressed_bio instead and use that to call finish_compressed_bio_write. This allows to remove all handling of write bios in the btrfs_bio_wq_end_io infrastructure. Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 44 ++++++++++++++++++++++---------------------- fs/btrfs/compression.h | 7 +++++-- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 30 ++++++++++++------------------ fs/btrfs/super.c | 2 -- 5 files changed, 40 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 2536754656b6..2ea5cf5ae210 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -398,6 +398,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb) kfree(cb); } +static void btrfs_finish_compressed_write_work(struct work_struct *work) +{ + struct compressed_bio *cb = + container_of(work, struct compressed_bio, write_end_work); + + finish_compressed_bio_write(cb); +} + /* * Do the cleanup once all the compressed pages hit the disk. This will clear * writeback on the file pages and free the compressed pages. @@ -409,29 +417,15 @@ static void end_compressed_bio_write(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; - if (!dec_and_test_compressed_bio(cb, bio)) - goto out; - - btrfs_record_physical_zoned(cb->inode, cb->start, bio); + if (dec_and_test_compressed_bio(cb, bio)) { + struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); - finish_compressed_bio_write(cb); -out: + btrfs_record_physical_zoned(cb->inode, cb->start, bio); + queue_work(fs_info->compressed_write_workers, &cb->write_end_work); + } bio_put(bio); } -static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, - struct bio *bio, int mirror_num) -{ - blk_status_t ret; - - ASSERT(bio->bi_iter.bi_size); - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); - if (ret) - return ret; - ret = btrfs_map_bio(fs_info, bio, mirror_num); - return ret; -} - /* * Allocate a compressed_bio, which will be used to read/write on-disk * (aka, compressed) * data. @@ -528,7 +522,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->compressed_pages = compressed_pages; cb->compressed_len = compressed_len; cb->writeback = writeback; - cb->orig_bio = NULL; + INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work); cb->nr_pages = nr_pages; if (blkcg_css) @@ -598,7 +592,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, goto finish_cb; } - ret = submit_compressed_bio(fs_info, bio, 0); + ASSERT(bio->bi_iter.bi_size); + ret = btrfs_map_bio(fs_info, bio, 0); if (ret) goto finish_cb; bio = NULL; @@ -935,7 +930,12 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, fs_info->sectorsize); sums += fs_info->csum_size * nr_sectors; - ret = submit_compressed_bio(fs_info, comp_bio, mirror_num); + ASSERT(comp_bio->bi_iter.bi_size); + ret = btrfs_bio_wq_end_io(fs_info, comp_bio, + BTRFS_WQ_ENDIO_DATA); + if (ret) + goto finish_cb; + ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); if (ret) goto finish_cb; comp_bio = NULL; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 2707404389a5..5fca7603e928 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -61,8 +61,11 @@ struct compressed_bio { blk_status_t status; int mirror_num; - /* for reads, this is the bio we are copying the data into */ - struct bio *orig_bio; + union { + /* For reads, this is the bio we are copying the data into */ + struct bio *orig_bio; + struct work_struct write_end_work; + }; /* * the start of a variable length array of checksums only diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1d5b38f3aa5f..e689dba076b0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -854,7 +854,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *endio_meta_workers; struct workqueue_struct *endio_raid56_workers; struct workqueue_struct *rmw_workers; - struct btrfs_workqueue *endio_meta_write_workers; + struct workqueue_struct *compressed_write_workers; struct btrfs_workqueue *endio_write_workers; struct btrfs_workqueue *endio_freespace_worker; struct btrfs_workqueue *caching_workers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c9c6c2980dd..ea32627139b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -749,19 +749,10 @@ static void end_workqueue_bio(struct bio *bio) fs_info = end_io_wq->info; end_io_wq->status = bio->bi_status; - if (btrfs_op(bio) == BTRFS_MAP_WRITE) { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) - wq = fs_info->endio_meta_write_workers; - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) - wq = fs_info->endio_freespace_worker; - else - wq = fs_info->endio_write_workers; - } else { - if (end_io_wq->metadata) - wq = fs_info->endio_meta_workers; - else - wq = fs_info->endio_workers; - } + if (end_io_wq->metadata) + wq = fs_info->endio_meta_workers; + else + wq = fs_info->endio_workers; btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); btrfs_queue_work(wq, &end_io_wq->work); @@ -772,6 +763,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, { struct btrfs_end_io_wq *end_io_wq; + if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE)) + return BLK_STS_IOERR; + end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); if (!end_io_wq) return BLK_STS_RESOURCE; @@ -2281,6 +2275,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) destroy_workqueue(fs_info->endio_raid56_workers); if (fs_info->rmw_workers) destroy_workqueue(fs_info->rmw_workers); + if (fs_info->compressed_write_workers) + destroy_workqueue(fs_info->compressed_write_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); btrfs_destroy_workqueue(fs_info->endio_freespace_worker); btrfs_destroy_workqueue(fs_info->delayed_workers); @@ -2295,7 +2291,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) * queues can do metadata I/O operations. */ btrfs_destroy_workqueue(fs_info->endio_meta_workers); - btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); } static void free_root_extent_buffers(struct btrfs_root *root) @@ -2483,15 +2478,14 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) fs_info->endio_meta_workers = btrfs_alloc_workqueue(fs_info, "endio-meta", flags, max_active, 4); - fs_info->endio_meta_write_workers = - btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags, - max_active, 2); fs_info->endio_raid56_workers = alloc_workqueue("btrfs-endio-raid56", flags, max_active); fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); fs_info->endio_write_workers = btrfs_alloc_workqueue(fs_info, "endio-write", flags, max_active, 2); + fs_info->compressed_write_workers = + alloc_workqueue("btrfs-compressed-write", flags, max_active); fs_info->endio_freespace_worker = btrfs_alloc_workqueue(fs_info, "freespace-write", flags, max_active, 0); @@ -2506,7 +2500,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) if (!(fs_info->workers && fs_info->hipri_workers && fs_info->delalloc_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && - fs_info->endio_meta_write_workers && + fs_info->compressed_write_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->fixup_workers && diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8539ee2dc79f..e3800f0f993f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1934,8 +1934,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_meta_write_workers, - new_pool_size); btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); -- cgit From d7b9416fe5c581c69e446b971c4a0394c609fd89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 May 2022 09:36:40 +0200 Subject: btrfs: remove btrfs_end_io_wq All reads bio that go through btrfs_map_bio need to be completed in user context. And read I/Os are the most common and timing critical in almost any file system workloads. Embed a work_struct into struct btrfs_bio and use it to complete all read bios submitted through btrfs_map, using the REQ_META flag to decide which workqueue they are placed on. This removes the need for a separate 128 byte allocation (typically rounded up to 192 bytes by slab) for all reads with a size increase of 24 bytes for struct btrfs_bio. Future patches will reorganize struct btrfs_bio to make use of this extra space for writes as well. (All sizes are based a on typical 64-bit non-debug build) Reviewed-by: Qu Wenruo Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 4 -- fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 120 +++---------------------------------------------- fs/btrfs/disk-io.h | 10 ----- fs/btrfs/inode.c | 24 +--------- fs/btrfs/super.c | 11 +---- fs/btrfs/volumes.c | 33 +++++++++++--- fs/btrfs/volumes.h | 3 ++ 8 files changed, 42 insertions(+), 167 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 2ea5cf5ae210..63d542961b78 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -931,10 +931,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, sums += fs_info->csum_size * nr_sectors; ASSERT(comp_bio->bi_iter.bi_size); - ret = btrfs_bio_wq_end_io(fs_info, comp_bio, - BTRFS_WQ_ENDIO_DATA); - if (ret) - goto finish_cb; ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); if (ret) goto finish_cb; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e689dba076b0..22a287cbc3e7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -850,8 +850,8 @@ struct btrfs_fs_info { struct btrfs_workqueue *hipri_workers; struct btrfs_workqueue *delalloc_workers; struct btrfs_workqueue *flush_workers; - struct btrfs_workqueue *endio_workers; - struct btrfs_workqueue *endio_meta_workers; + struct workqueue_struct *endio_workers; + struct workqueue_struct *endio_meta_workers; struct workqueue_struct *endio_raid56_workers; struct workqueue_struct *rmw_workers; struct workqueue_struct *compressed_write_workers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a085cd95ef12..ed1d92b370db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -51,7 +51,6 @@ BTRFS_SUPER_FLAG_METADUMP |\ BTRFS_SUPER_FLAG_METADUMP_V2) -static void end_workqueue_fn(struct btrfs_work *work); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info); @@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info); static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info); -/* - * btrfs_end_io_wq structs are used to do processing in task context when an IO - * is complete. This is used during reads to verify checksums, and it is used - * by writes to insert metadata for new file extents after IO is complete. - */ -struct btrfs_end_io_wq { - struct bio *bio; - bio_end_io_t *end_io; - void *private; - struct btrfs_fs_info *info; - blk_status_t status; - enum btrfs_wq_endio_type metadata; - struct btrfs_work work; -}; - -static struct kmem_cache *btrfs_end_io_wq_cache; - -int __init btrfs_end_io_wq_init(void) -{ - btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", - sizeof(struct btrfs_end_io_wq), - 0, - SLAB_MEM_SPREAD, - NULL); - if (!btrfs_end_io_wq_cache) - return -ENOMEM; - return 0; -} - -void __cold btrfs_end_io_wq_exit(void) -{ - kmem_cache_destroy(btrfs_end_io_wq_cache); -} - static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) { if (fs_info->csum_shash) @@ -740,48 +705,6 @@ err: return ret; } -static void end_workqueue_bio(struct bio *bio) -{ - struct btrfs_end_io_wq *end_io_wq = bio->bi_private; - struct btrfs_fs_info *fs_info; - struct btrfs_workqueue *wq; - - fs_info = end_io_wq->info; - end_io_wq->status = bio->bi_status; - - if (end_io_wq->metadata) - wq = fs_info->endio_meta_workers; - else - wq = fs_info->endio_workers; - - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); - btrfs_queue_work(wq, &end_io_wq->work); -} - -blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, - enum btrfs_wq_endio_type metadata) -{ - struct btrfs_end_io_wq *end_io_wq; - - if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE)) - return BLK_STS_IOERR; - - end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); - if (!end_io_wq) - return BLK_STS_RESOURCE; - - end_io_wq->private = bio->bi_private; - end_io_wq->end_io = bio->bi_end_io; - end_io_wq->info = info; - end_io_wq->status = 0; - end_io_wq->bio = bio; - end_io_wq->metadata = metadata; - - bio->bi_private = end_io_wq; - bio->bi_end_io = end_workqueue_bio; - return 0; -} - static void run_one_async_start(struct btrfs_work *work) { struct async_submit_bio *async; @@ -917,14 +840,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_ bio->bi_opf |= REQ_META; if (btrfs_op(bio) != BTRFS_MAP_WRITE) { - /* - * called for a read, do the setup so that checksum validation - * can happen in the async kernel threads - */ - ret = btrfs_bio_wq_end_io(fs_info, bio, - BTRFS_WQ_ENDIO_METADATA); - if (!ret) - ret = btrfs_map_bio(fs_info, bio, mirror_num); + ret = btrfs_map_bio(fs_info, bio, mirror_num); } else if (!should_async_write(fs_info, BTRFS_I(inode))) { ret = btree_csum_one_bio(bio); if (!ret) @@ -1947,25 +1863,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, return root; } -/* - * called by the kthread helper functions to finally call the bio end_io - * functions. This is where read checksum verification actually happens - */ -static void end_workqueue_fn(struct btrfs_work *work) -{ - struct bio *bio; - struct btrfs_end_io_wq *end_io_wq; - - end_io_wq = container_of(work, struct btrfs_end_io_wq, work); - bio = end_io_wq->bio; - - bio->bi_status = end_io_wq->status; - bio->bi_private = end_io_wq->private; - bio->bi_end_io = end_io_wq->end_io; - bio_endio(bio); - kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); -} - static int cleaner_kthread(void *arg) { struct btrfs_fs_info *fs_info = arg; @@ -2272,7 +2169,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->hipri_workers); btrfs_destroy_workqueue(fs_info->workers); - btrfs_destroy_workqueue(fs_info->endio_workers); + if (fs_info->endio_workers) + destroy_workqueue(fs_info->endio_workers); if (fs_info->endio_raid56_workers) destroy_workqueue(fs_info->endio_raid56_workers); if (fs_info->rmw_workers) @@ -2292,7 +2190,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) * the queues used for metadata I/O, since tasks from those other work * queues can do metadata I/O operations. */ - btrfs_destroy_workqueue(fs_info->endio_meta_workers); + if (fs_info->endio_meta_workers) + destroy_workqueue(fs_info->endio_meta_workers); } static void free_root_extent_buffers(struct btrfs_root *root) @@ -2471,15 +2370,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) fs_info->fixup_workers = btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0); - /* - * endios are largely parallel and should have a very - * low idle thresh - */ fs_info->endio_workers = - btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4); + alloc_workqueue("btrfs-endio", flags, max_active); fs_info->endio_meta_workers = - btrfs_alloc_workqueue(fs_info, "endio-meta", flags, - max_active, 4); + alloc_workqueue("btrfs-endio-meta", flags, max_active); fs_info->endio_raid56_workers = alloc_workqueue("btrfs-endio-raid56", flags, max_active); fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 809ef065f166..05e779a41a99 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -17,12 +17,6 @@ */ #define BTRFS_BDEV_BLOCKSIZE (4096) -enum btrfs_wq_endio_type { - BTRFS_WQ_ENDIO_DATA, - BTRFS_WQ_ENDIO_METADATA, - BTRFS_WQ_ENDIO_FREE_SPACE, -}; - static inline u64 btrfs_sb_offset(int mirror) { u64 start = SZ_16K; @@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid, int level, struct btrfs_key *first_key); -blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, - enum btrfs_wq_endio_type metadata); blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num, u64 dio_file_offset, extent_submit_bio_start_t *submit_bio_start); @@ -144,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data, int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid); int btrfs_init_root_free_objectid(struct btrfs_root *root); -int __init btrfs_end_io_wq_init(void); -void __cold btrfs_end_io_wq_exit(void); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_set_buffer_lockdep_class(u64 objectid, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3067c966d8b6..9cce0a3228f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2640,12 +2640,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, return; } - ret = btrfs_bio_wq_end_io(fs_info, bio, - btrfs_is_free_space_inode(BTRFS_I(inode)) ? - BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA); - if (ret) - goto out; - /* * Lookup bio sums does extra checks around whether we need to csum or * not, which is why we ignore skip_sum here. @@ -7879,9 +7873,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio, BUG_ON(bio_op(bio) == REQ_OP_WRITE); - if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA)) - return; - refcount_inc(&dip->refs); if (btrfs_map_bio(fs_info, bio, mirror_num)) refcount_dec(&dip->refs); @@ -7970,19 +7961,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_dio_private *dip = bio->bi_private; - bool write = btrfs_op(bio) == BTRFS_MAP_WRITE; blk_status_t ret; - if (!write) { - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); - if (ret) - return ret; - } - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) goto map; - if (write) { + if (btrfs_op(bio) == BTRFS_MAP_WRITE) { /* Check btrfs_submit_data_write_bio() for async submit rules */ if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers)) return btrfs_wq_submit_bio(inode, bio, 0, file_offset, @@ -10314,12 +10298,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, return ret; } - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); - if (ret) { - btrfs_bio_free_csum(bbio); - return ret; - } - atomic_inc(&priv->pending); ret = btrfs_map_bio(fs_info, bio, mirror_num); if (ret) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e3800f0f993f..719dda57dc7a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1932,8 +1932,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); @@ -2702,13 +2700,9 @@ static int __init init_btrfs_fs(void) if (err) goto free_delayed_ref; - err = btrfs_end_io_wq_init(); - if (err) - goto free_prelim_ref; - err = btrfs_interface_init(); if (err) - goto free_end_io_wq; + goto free_prelim_ref; btrfs_print_mod_info(); @@ -2724,8 +2718,6 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); -free_end_io_wq: - btrfs_end_io_wq_exit(); free_prelim_ref: btrfs_prelim_ref_exit(); free_delayed_ref: @@ -2763,7 +2755,6 @@ static void __exit exit_btrfs_fs(void) extent_state_cache_exit(); extent_io_exit(); btrfs_interface_exit(); - btrfs_end_io_wq_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); btrfs_cleanup_fs_uuids(); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9935b5d955be..04e7e79cab47 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6616,11 +6616,27 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1); } -static inline void btrfs_end_bioc(struct btrfs_io_context *bioc) +static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc) +{ + if (bioc->orig_bio->bi_opf & REQ_META) + return bioc->fs_info->endio_meta_workers; + return bioc->fs_info->endio_workers; +} + +static void btrfs_end_bio_work(struct work_struct *work) +{ + struct btrfs_bio *bbio = + container_of(work, struct btrfs_bio, end_io_work); + + bio_endio(&bbio->bio); +} + +static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async) { struct bio *orig_bio = bioc->orig_bio; + struct btrfs_bio *bbio = btrfs_bio(orig_bio); - btrfs_bio(orig_bio)->mirror_num = bioc->mirror_num; + bbio->mirror_num = bioc->mirror_num; orig_bio->bi_private = bioc->private; orig_bio->bi_end_io = bioc->end_io; @@ -6632,7 +6648,14 @@ static inline void btrfs_end_bioc(struct btrfs_io_context *bioc) orig_bio->bi_status = BLK_STS_IOERR; else orig_bio->bi_status = BLK_STS_OK; - bio_endio(orig_bio); + + if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) { + INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work); + queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work); + } else { + bio_endio(orig_bio); + } + btrfs_put_bioc(bioc); } @@ -6664,7 +6687,7 @@ static void btrfs_end_bio(struct bio *bio) btrfs_bio_counter_dec(bioc->fs_info); if (atomic_dec_and_test(&bioc->stripes_pending)) - btrfs_end_bioc(bioc); + btrfs_end_bioc(bioc, true); } static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio, @@ -6762,7 +6785,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { atomic_inc(&bioc->error); if (atomic_dec_and_test(&bioc->stripes_pending)) - btrfs_end_bioc(bioc); + btrfs_end_bioc(bioc, false); continue; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1e86c48268ed..7973d11e5f5d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -371,6 +371,9 @@ struct btrfs_bio { u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; struct bvec_iter iter; + /* For read end I/O handling */ + struct work_struct end_io_work; + /* * This member must come last, bio_alloc_bioset will allocate enough * bytes for entire btrfs_bio but relies on bio being last. -- cgit From 1a722d8f5be22b0d8c9db365abb67cb3c2e4215c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jun 2022 12:04:07 +0200 Subject: btrfs: do not return errors from btrfs_map_bio Always consume the bio and call the end_io handler on error instead of returning an error and letting the caller handle it. This matches what the block layer submission does and avoids any confusion on who needs to handle errors. As this requires touching all the callers, rename the function to btrfs_submit_bio, which describes the functionality much better. Reviewed-by: Nikolay Borisov Tested-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 8 ++------ fs/btrfs/disk-io.c | 21 ++++++++++----------- fs/btrfs/inode.c | 25 ++++++++++--------------- fs/btrfs/volumes.c | 12 +++++++----- fs/btrfs/volumes.h | 3 +-- 5 files changed, 30 insertions(+), 39 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 63d542961b78..907fc8a4c092 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -593,9 +593,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, } ASSERT(bio->bi_iter.bi_size); - ret = btrfs_map_bio(fs_info, bio, 0); - if (ret) - goto finish_cb; + btrfs_submit_bio(fs_info, bio, 0); bio = NULL; } cond_resched(); @@ -931,9 +929,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, sums += fs_info->csum_size * nr_sectors; ASSERT(comp_bio->bi_iter.bi_size); - ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); - if (ret) - goto finish_cb; + btrfs_submit_bio(fs_info, comp_bio, mirror_num); comp_bio = NULL; } } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ae7fc4ed2524..5719712f2d4c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -731,7 +731,6 @@ static void run_one_async_done(struct btrfs_work *work) { struct async_submit_bio *async; struct inode *inode; - blk_status_t ret; async = container_of(work, struct async_submit_bio, work); inode = async->inode; @@ -749,11 +748,7 @@ static void run_one_async_done(struct btrfs_work *work) * This changes nothing when cgroups aren't in use. */ async->bio->bi_opf |= REQ_CGROUP_PUNT; - ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num); - if (ret) { - async->bio->bi_status = ret; - bio_endio(async->bio); - } + btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num); } static void run_one_async_free(struct btrfs_work *work) @@ -817,7 +812,7 @@ static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio, { /* * when we're called for a write, we're already in the async - * submission context. Just jump into btrfs_map_bio + * submission context. Just jump into btrfs_submit_bio. */ return btree_csum_one_bio(bio); } @@ -842,11 +837,15 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_ bio->bi_opf |= REQ_META; if (btrfs_op(bio) != BTRFS_MAP_WRITE) { - ret = btrfs_map_bio(fs_info, bio, mirror_num); - } else if (!should_async_write(fs_info, BTRFS_I(inode))) { + btrfs_submit_bio(fs_info, bio, mirror_num); + return; + } + if (!should_async_write(fs_info, BTRFS_I(inode))) { ret = btree_csum_one_bio(bio); - if (!ret) - ret = btrfs_map_bio(fs_info, bio, mirror_num); + if (!ret) { + btrfs_submit_bio(fs_info, bio, mirror_num); + return; + } } else { /* * kthread helpers are used to submit writes so that diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e4632c83453e..fe7e8af21c2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2687,7 +2687,8 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro goto out; } } - ret = btrfs_map_bio(fs_info, bio, mirror_num); + btrfs_submit_bio(fs_info, bio, mirror_num); + return; out: if (ret) { bio->bi_status = ret; @@ -2715,14 +2716,13 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, * not, which is why we ignore skip_sum here. */ ret = btrfs_lookup_bio_sums(inode, bio, NULL); - if (ret) - goto out; - ret = btrfs_map_bio(fs_info, bio, mirror_num); -out: if (ret) { bio->bi_status = ret; bio_endio(bio); + return; } + + btrfs_submit_bio(fs_info, bio, mirror_num); } /* @@ -7945,8 +7945,7 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio, BUG_ON(bio_op(bio) == REQ_OP_WRITE); refcount_inc(&dip->refs); - if (btrfs_map_bio(fs_info, bio, mirror_num)) - refcount_dec(&dip->refs); + btrfs_submit_bio(fs_info, bio, mirror_num); } static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, @@ -8046,7 +8045,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, file_offset - dip->file_offset); } map: - return btrfs_map_bio(fs_info, bio, 0); + btrfs_submit_bio(fs_info, bio, 0); + return BLK_STS_OK; } static void btrfs_submit_direct(const struct iomap_iter *iter, @@ -10330,7 +10330,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) { struct btrfs_encoded_read_private *priv = bio->bi_private; - struct btrfs_bio *bbio = btrfs_bio(bio); struct btrfs_fs_info *fs_info = inode->root->fs_info; blk_status_t ret; @@ -10341,12 +10340,8 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode, } atomic_inc(&priv->pending); - ret = btrfs_map_bio(fs_info, bio, mirror_num); - if (ret) { - atomic_dec(&priv->pending); - btrfs_bio_free_csum(bbio); - } - return ret; + btrfs_submit_bio(fs_info, bio, mirror_num); + return BLK_STS_OK; } static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2eb72dda764c..6b2ad30e0221 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6726,8 +6726,8 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, } } btrfs_debug_in_rcu(fs_info, - "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", - bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, + "%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", + __func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid, bio->bi_iter.bi_size); @@ -6737,8 +6737,7 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, submit_bio(bio); } -blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num) +void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num) { u64 logical = bio->bi_iter.bi_sector << 9; u64 length = bio->bi_iter.bi_size; @@ -6783,7 +6782,10 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } out_dec: btrfs_bio_counter_dec(fs_info); - return errno_to_blk_status(ret); + if (ret) { + bio->bi_status = errno_to_blk_status(ret); + bio_endio(bio); + } } static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 9537d82bb7a2..5639961b3626 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -580,8 +580,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_tree_free(struct extent_map_tree *tree); -blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num); +void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder); struct btrfs_device *btrfs_scan_one_device(const char *path, -- cgit From 524bcd1e178da1dccf24d9fc60fb20a35ec45e88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jul 2022 07:33:27 +0200 Subject: btrfs: simplify the pending I/O counting in struct compressed_bio Instead of counting the sectors just count the bios, with an extra reference held during submission. This significantly simplifies the submission side error handling. This slightly changes completion and error handling of btrfs_submit_compressed_{read,write} because with the old code the compressed_bio could have been completed in submit_compressed_{read,write} only if there was an error during submission for one of the lower bio, whilst with the new code there is a chance for this to happen even for successful submission if the all the lower bios complete before the end of the function is reached. Reviewed-by: Nikolay Borisov Reviewed-by: Boris Burkov Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 125 ++++++++++++------------------------------------- fs/btrfs/compression.h | 4 +- 2 files changed, 32 insertions(+), 97 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 907fc8a4c092..37676949a2b0 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -191,44 +191,6 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, return 0; } -/* - * Reduce bio and io accounting for a compressed_bio with its corresponding bio. - * - * Return true if there is no pending bio nor io. - * Return false otherwise. - */ -static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); - unsigned int bi_size = 0; - bool last_io = false; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - /* - * At endio time, bi_iter.bi_size doesn't represent the real bio size. - * Thus here we have to iterate through all segments to grab correct - * bio size. - */ - bio_for_each_segment_all(bvec, bio, iter_all) - bi_size += bvec->bv_len; - - if (bio->bi_status) - cb->status = bio->bi_status; - - ASSERT(bi_size && bi_size <= cb->compressed_len); - last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits, - &cb->pending_sectors); - /* - * Here we must wake up the possible error handler after all other - * operations on @cb finished, or we can race with - * finish_compressed_bio_*() which may free @cb. - */ - wake_up_var(cb); - - return last_io; -} - static void finish_compressed_bio_read(struct compressed_bio *cb) { unsigned int index; @@ -288,7 +250,10 @@ static void end_compressed_bio_read(struct bio *bio) unsigned int mirror = btrfs_bio(bio)->mirror_num; int ret = 0; - if (!dec_and_test_compressed_bio(cb, bio)) + if (bio->bi_status) + cb->status = bio->bi_status; + + if (!refcount_dec_and_test(&cb->pending_ios)) goto out; /* @@ -417,7 +382,10 @@ static void end_compressed_bio_write(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; - if (dec_and_test_compressed_bio(cb, bio)) { + if (bio->bi_status) + cb->status = bio->bi_status; + + if (refcount_dec_and_test(&cb->pending_ios)) { struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); btrfs_record_physical_zoned(cb->inode, cb->start, bio); @@ -476,7 +444,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte return ERR_PTR(ret); } *next_stripe_start = disk_bytenr + geom.len; - + refcount_inc(&cb->pending_ios); return bio; } @@ -503,7 +471,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, struct compressed_bio *cb; u64 cur_disk_bytenr = disk_start; u64 next_stripe_start; - blk_status_t ret; + blk_status_t ret = BLK_STS_OK; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; const bool use_append = btrfs_use_zone_append(inode, disk_start); const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; @@ -513,7 +481,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); if (!cb) return BLK_STS_RESOURCE; - refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); + refcount_set(&cb->pending_ios, 1); cb->status = BLK_STS_OK; cb->inode = &inode->vfs_inode; cb->start = start; @@ -543,8 +511,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, &next_stripe_start); if (IS_ERR(bio)) { ret = errno_to_blk_status(PTR_ERR(bio)); - bio = NULL; - goto finish_cb; + break; } if (blkcg_css) bio->bi_opf |= REQ_CGROUP_PUNT; @@ -588,8 +555,11 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, if (submit) { if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, start, true); - if (ret) - goto finish_cb; + if (ret) { + bio->bi_status = ret; + bio_endio(bio); + break; + } } ASSERT(bio->bi_iter.bi_size); @@ -598,33 +568,12 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, } cond_resched(); } - if (blkcg_css) - kthread_associate_blkcg(NULL); - return 0; - -finish_cb: if (blkcg_css) kthread_associate_blkcg(NULL); - if (bio) { - bio->bi_status = ret; - bio_endio(bio); - } - /* Last byte of @cb is submitted, endio will free @cb */ - if (cur_disk_bytenr == disk_start + compressed_len) - return ret; - - wait_var_event(cb, refcount_read(&cb->pending_sectors) == - (disk_start + compressed_len - cur_disk_bytenr) >> - fs_info->sectorsize_bits); - /* - * Even with previous bio ended, we should still have io not yet - * submitted, thus need to finish manually. - */ - ASSERT(refcount_read(&cb->pending_sectors)); - /* Now we are the only one referring @cb, can finish it safely. */ - finish_compressed_bio_write(cb); + if (refcount_dec_and_test(&cb->pending_ios)) + finish_compressed_bio_write(cb); return ret; } @@ -830,7 +779,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, goto out; } - refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); + refcount_set(&cb->pending_ios, 1); cb->status = BLK_STS_OK; cb->inode = inode; cb->mirror_num = mirror_num; @@ -880,9 +829,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, REQ_OP_READ, end_compressed_bio_read, &next_stripe_start); if (IS_ERR(comp_bio)) { - ret = errno_to_blk_status(PTR_ERR(comp_bio)); - comp_bio = NULL; - goto finish_cb; + cb->status = errno_to_blk_status(PTR_ERR(comp_bio)); + break; } } /* @@ -921,8 +869,11 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, unsigned int nr_sectors; ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); - if (ret) - goto finish_cb; + if (ret) { + comp_bio->bi_status = ret; + bio_endio(comp_bio); + break; + } nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size, fs_info->sectorsize); @@ -933,6 +884,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = NULL; } } + + if (refcount_dec_and_test(&cb->pending_ios)) + finish_compressed_bio_read(cb); return; fail: @@ -950,25 +904,6 @@ out: bio->bi_status = ret; bio_endio(bio); return; -finish_cb: - if (comp_bio) { - comp_bio->bi_status = ret; - bio_endio(comp_bio); - } - /* All bytes of @cb is submitted, endio will free @cb */ - if (cur_disk_byte == disk_bytenr + compressed_len) - return; - - wait_var_event(cb, refcount_read(&cb->pending_sectors) == - (disk_bytenr + compressed_len - cur_disk_byte) >> - fs_info->sectorsize_bits); - /* - * Even with previous bio ended, we should still have io not yet - * submitted, thus need to finish @cb manually. - */ - ASSERT(refcount_read(&cb->pending_sectors)); - /* Now we are the only one referring @cb, can finish it safely. */ - finish_compressed_bio_read(cb); } /* diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 5fca7603e928..0e4cbf04fd86 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -30,8 +30,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0); #define BTRFS_ZLIB_DEFAULT_LEVEL 3 struct compressed_bio { - /* Number of sectors with unfinished IO (unsubmitted or unfinished) */ - refcount_t pending_sectors; + /* Number of outstanding bios */ + refcount_t pending_ios; /* Number of compressed pages in the array */ unsigned int nr_pages; -- cgit From 81bd9328ab9f9bf818923b92a64896fd4cf58f2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jul 2022 07:33:30 +0200 Subject: btrfs: fix repair of compressed extents Currently the checksum of compressed extents is verified based on the compressed data and the lower btrfs_bio, but the actual repair process is driven by end_bio_extent_readpage on the upper btrfs_bio for the decompressed data. This has a bunch of issues, including not being able to properly communicate the failed mirror up in case that the I/O submission got preempted, a general loss of if an error was an I/O error or a checksum verification failure, but most importantly that this design causes btrfs_clean_io_failure to eventually write back the uncompressed good data onto the disk sectors that are supposed to contain compressed data. Fix this by moving the repair to the lower btrfs_bio. To do so, a fair amount of code has to be reshuffled: a) the lower btrfs_bio now needs a valid csum pointer. The easiest way to achieve that is to pass NULL btrfs_lookup_bio_sums and just use the btrfs_bio management of csums. For a compressed_bio that is split into multiple btrfs_bios this means additional memory allocations, but the code becomes a lot more regular. b) checksum verification now runs directly on the lower btrfs_bio instead of the compressed_bio. This actually nicely simplifies the end I/O processing. c) btrfs_repair_one_sector can't just look up the logical address for the file offset any more, as there is no corresponding relative offsets that apply to the file offset and the logic address for compressed extents. Instead require that the saved bvec_iter in the btrfs_bio is filled out for all read bios and use that, which again removes a fair amount of code. Reviewed-by: Nikolay Borisov Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 171 +++++++++++++++++-------------------------------- fs/btrfs/compression.h | 7 -- fs/btrfs/ctree.h | 2 + fs/btrfs/extent_io.c | 45 +++---------- fs/btrfs/extent_io.h | 1 - fs/btrfs/inode.c | 7 ++ 6 files changed, 76 insertions(+), 157 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 37676949a2b0..8124cd3d0b6b 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -136,66 +136,14 @@ static int compression_decompress(int type, struct list_head *ws, static int btrfs_decompress_bio(struct compressed_bio *cb); -static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, - unsigned long disk_size) -{ - return sizeof(struct compressed_bio) + - (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size; -} - -static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, - u64 disk_start) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - const u32 csum_size = fs_info->csum_size; - const u32 sectorsize = fs_info->sectorsize; - struct page *page; - unsigned int i; - u8 csum[BTRFS_CSUM_SIZE]; - struct compressed_bio *cb = bio->bi_private; - u8 *cb_sum = cb->sums; - - if ((inode->flags & BTRFS_INODE_NODATASUM) || - test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) - return 0; - - for (i = 0; i < cb->nr_pages; i++) { - u32 pg_offset; - u32 bytes_left = PAGE_SIZE; - page = cb->compressed_pages[i]; - - /* Determine the remaining bytes inside the page first */ - if (i == cb->nr_pages - 1) - bytes_left = cb->compressed_len - i * PAGE_SIZE; - - /* Hash through the page sector by sector */ - for (pg_offset = 0; pg_offset < bytes_left; - pg_offset += sectorsize) { - int ret; - - ret = btrfs_check_sector_csum(fs_info, page, pg_offset, - csum, cb_sum); - if (ret) { - btrfs_print_data_csum_error(inode, disk_start, - csum, cb_sum, cb->mirror_num); - if (btrfs_bio(bio)->device) - btrfs_dev_stat_inc_and_print( - btrfs_bio(bio)->device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - return -EIO; - } - cb_sum += csum_size; - disk_start += sectorsize; - } - } - return 0; -} - static void finish_compressed_bio_read(struct compressed_bio *cb) { unsigned int index; struct page *page; + if (cb->status == BLK_STS_OK) + cb->status = errno_to_blk_status(btrfs_decompress_bio(cb)); + /* Release the compressed pages */ for (index = 0; index < cb->nr_pages; index++) { page = cb->compressed_pages[index]; @@ -233,59 +181,54 @@ static void finish_compressed_bio_read(struct compressed_bio *cb) kfree(cb); } -/* when we finish reading compressed pages from the disk, we - * decompress them and then run the bio end_io routines on the - * decompressed pages (in the inode address space). - * - * This allows the checksumming and other IO error handling routines - * to work normally - * - * The compressed pages are freed here, and it must be run - * in process context +/* + * Verify the checksums and kick off repair if needed on the uncompressed data + * before decompressing it into the original bio and freeing the uncompressed + * pages. */ static void end_compressed_bio_read(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; - struct inode *inode; - unsigned int mirror = btrfs_bio(bio)->mirror_num; - int ret = 0; - - if (bio->bi_status) - cb->status = bio->bi_status; - - if (!refcount_dec_and_test(&cb->pending_ios)) - goto out; - - /* - * Record the correct mirror_num in cb->orig_bio so that - * read-repair can work properly. - */ - btrfs_bio(cb->orig_bio)->mirror_num = mirror; - cb->mirror_num = mirror; - - /* - * Some IO in this cb have failed, just skip checksum as there - * is no way it could be correct. - */ - if (cb->status != BLK_STS_OK) - goto csum_failed; + struct inode *inode = cb->inode; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_inode *bi = BTRFS_I(inode); + bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) && + !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); + blk_status_t status = bio->bi_status; + struct btrfs_bio *bbio = btrfs_bio(bio); + struct bvec_iter iter; + struct bio_vec bv; + u32 offset; + + btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) { + u64 start = bbio->file_offset + offset; + + if (!status && + (!csum || !btrfs_check_data_csum(inode, bbio, offset, + bv.bv_page, bv.bv_offset))) { + clean_io_failure(fs_info, &bi->io_failure_tree, + &bi->io_tree, start, bv.bv_page, + btrfs_ino(bi), bv.bv_offset); + } else { + int ret; - inode = cb->inode; - ret = check_compressed_csum(BTRFS_I(inode), bio, - bio->bi_iter.bi_sector << 9); - if (ret) - goto csum_failed; + refcount_inc(&cb->pending_ios); + ret = btrfs_repair_one_sector(inode, bbio, offset, + bv.bv_page, bv.bv_offset, + btrfs_submit_data_read_bio); + if (ret) { + refcount_dec(&cb->pending_ios); + status = errno_to_blk_status(ret); + } + } + } - /* ok, we're the last bio for this extent, lets start - * the decompression. - */ - ret = btrfs_decompress_bio(cb); + if (status) + cb->status = status; -csum_failed: - if (ret) - cb->status = errno_to_blk_status(ret); - finish_compressed_bio_read(cb); -out: + if (refcount_dec_and_test(&cb->pending_ios)) + finish_compressed_bio_read(cb); + btrfs_bio_free_csum(bbio); bio_put(bio); } @@ -478,7 +421,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && IS_ALIGNED(len, fs_info->sectorsize)); - cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); + cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS); if (!cb) return BLK_STS_RESOURCE; refcount_set(&cb->pending_ios, 1); @@ -486,7 +429,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->inode = &inode->vfs_inode; cb->start = start; cb->len = len; - cb->mirror_num = 0; cb->compressed_pages = compressed_pages; cb->compressed_len = compressed_len; cb->writeback = writeback; @@ -755,7 +697,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, blk_status_t ret; int ret2; int i; - u8 *sums; em_tree = &BTRFS_I(inode)->extent_tree; @@ -773,7 +714,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ASSERT(em->compress_type != BTRFS_COMPRESS_NONE); compressed_len = em->block_len; - cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); + cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS); if (!cb) { ret = BLK_STS_RESOURCE; goto out; @@ -782,8 +723,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, refcount_set(&cb->pending_ios, 1); cb->status = BLK_STS_OK; cb->inode = inode; - cb->mirror_num = mirror_num; - sums = cb->sums; cb->start = em->orig_start; em_len = em->len; @@ -866,19 +805,25 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, submit = true; if (submit) { - unsigned int nr_sectors; + /* Save the original iter for read repair */ + if (bio_op(comp_bio) == REQ_OP_READ) + btrfs_bio(comp_bio)->iter = comp_bio->bi_iter; + + /* + * Save the initial offset of this chunk, as there + * is no direct correlation between compressed pages and + * the original file offset. The field is only used for + * priting error messages. + */ + btrfs_bio(comp_bio)->file_offset = file_offset; - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); + ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL); if (ret) { comp_bio->bi_status = ret; bio_endio(comp_bio); break; } - nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size, - fs_info->sectorsize); - sums += fs_info->csum_size * nr_sectors; - ASSERT(comp_bio->bi_iter.bi_size); btrfs_submit_bio(fs_info, comp_bio, mirror_num); comp_bio = NULL; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 0e4cbf04fd86..e9ef24034cad 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -59,19 +59,12 @@ struct compressed_bio { /* IO errors */ blk_status_t status; - int mirror_num; union { /* For reads, this is the bio we are copying the data into */ struct bio *orig_bio; struct work_struct write_end_work; }; - - /* - * the start of a variable length array of checksums only - * used by reads - */ - u8 sums[]; }; static inline unsigned int btrfs_compress_type(unsigned int type_level) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c567c73f7509..4db85b9dc7ed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3293,6 +3293,8 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, int mirror_num, enum btrfs_compression_type compress_type); int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, u32 pgoff, u8 *csum, const u8 * const csum_expected); +int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio, + u32 bio_offset, struct page *page, u32 pgoff); unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, u32 bio_offset, struct page *page, u64 start, u64 end); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4baf5cac7b19..b290bd1b38b0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2545,13 +2545,10 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 start = bbio->file_offset + bio_offset; struct io_failure_record *failrec; - struct extent_map *em; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; const u32 sectorsize = fs_info->sectorsize; int ret; - u64 logical; failrec = get_state_failrec(failure_tree, start); if (!IS_ERR(failrec)) { @@ -2576,41 +2573,13 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode failrec->len = sectorsize; failrec->failed_mirror = bbio->mirror_num; failrec->this_mirror = bbio->mirror_num; - failrec->compress_type = BTRFS_COMPRESS_NONE; - - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, start, failrec->len); - if (!em) { - read_unlock(&em_tree->lock); - kfree(failrec); - return ERR_PTR(-EIO); - } - - if (em->start > start || em->start + em->len <= start) { - free_extent_map(em); - em = NULL; - } - read_unlock(&em_tree->lock); - if (!em) { - kfree(failrec); - return ERR_PTR(-EIO); - } - - logical = start - em->start; - logical = em->block_start + logical; - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - logical = em->block_start; - failrec->compress_type = em->compress_type; - } + failrec->logical = (bbio->iter.bi_sector << SECTOR_SHIFT) + bio_offset; btrfs_debug(fs_info, - "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", - logical, start, failrec->len); - - failrec->logical = logical; - free_extent_map(em); + "new io failure record logical %llu start %llu", + failrec->logical, start); - failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize); + failrec->num_copies = btrfs_num_copies(fs_info, failrec->logical, sectorsize); if (failrec->num_copies == 1) { /* * We only have a single copy of the data, so don't bother with @@ -2709,7 +2678,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, * will be handled by the endio on the repair_bio, so we can't return an * error here. */ - submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->compress_type); + submit_bio_hook(inode, repair_bio, failrec->this_mirror, 0); return BLK_STS_OK; } @@ -3117,6 +3086,10 @@ static void end_bio_extent_readpage(struct bio *bio) * Only try to repair bios that actually made it to a * device. If the bio failed to be submitted mirror * is 0 and we need to fail it without retrying. + * + * This also includes the high level bios for compressed + * extents - these never make it to a device and repair + * is already handled on the lower compressed bio. */ if (mirror > 0) repair = true; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b802ac85cb74..4bc72a87b9a9 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -261,7 +261,6 @@ struct io_failure_record { u64 start; u64 len; u64 logical; - enum btrfs_compression_type compress_type; int this_mirror; int failed_mirror; int num_copies; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 18d397bfd28e..e8021d52c846 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2749,6 +2749,9 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, return; } + /* Save the original iter for read repair */ + btrfs_bio(bio)->iter = bio->bi_iter; + /* * Lookup bio sums does extra checks around whether we need to csum or * not, which is why we ignore skip_sum here. @@ -8060,6 +8063,10 @@ static void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, struct btrfs_dio_private *dip = bio->bi_private; blk_status_t ret; + /* Save the original iter for read repair */ + if (btrfs_op(bio) == BTRFS_MAP_READ) + btrfs_bio(bio)->iter = bio->bi_iter; + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) goto map; -- cgit From 0b078d9db8793b1bd911e97be854e3c964235c78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jul 2022 07:33:31 +0200 Subject: btrfs: don't call btrfs_page_set_checked in finish_compressed_bio_read This flag was used to communicate that the low-level compression code already did verify the checksum to the high-level I/O completion code. But it has been unused for a long time as the upper btrfs_bio for the decompressed data had a NULL csum pointer basically since that pointer existed and the code already checks for that a little later. Note that this does not affect the other use of the checked flag, which is only used for the COW fixup worker. Reviewed-by: Nikolay Borisov Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/compression.c | 24 ++---------------------- fs/btrfs/inode.c | 5 ----- 2 files changed, 2 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8124cd3d0b6b..f3df9b9b4381 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -152,29 +152,9 @@ static void finish_compressed_bio_read(struct compressed_bio *cb) } /* Do io completion on the original bio */ - if (cb->status != BLK_STS_OK) { + if (cb->status != BLK_STS_OK) cb->orig_bio->bi_status = cb->status; - bio_endio(cb->orig_bio); - } else { - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - /* - * We have verified the checksum already, set page checked so - * the end_io handlers know about it - */ - ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) { - u64 bvec_start = page_offset(bvec->bv_page) + - bvec->bv_offset; - - btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb), - bvec->bv_page, bvec_start, - bvec->bv_len); - } - - bio_endio(cb->orig_bio); - } + bio_endio(cb->orig_bio); /* Finally free the cb struct */ kfree(cb->compressed_pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e8021d52c846..ecc5fa3343fc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3504,11 +3504,6 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, u32 pg_off; unsigned int result = 0; - if (btrfs_page_test_checked(fs_info, page, start, end + 1 - start)) { - btrfs_page_clear_checked(fs_info, page, start, end + 1 - start); - return 0; - } - /* * This only happens for NODATASUM or compressed read. * Normally this should be covered by above check for compressed read -- cgit