diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
| -rw-r--r-- | fs/btrfs/scrub.c | 568 |
1 files changed, 384 insertions, 184 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b0251eb1239f..6f1e4c984b94 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -18,6 +18,7 @@ #include <linux/blkdev.h> #include <linux/ratelimit.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "volumes.h" #include "disk-io.h" @@ -64,7 +65,7 @@ struct scrub_ctx; #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ struct scrub_recover { - atomic_t refs; + refcount_t refs; struct btrfs_bio *bbio; u64 map_length; }; @@ -95,7 +96,7 @@ struct scrub_bio { struct scrub_ctx *sctx; struct btrfs_device *dev; struct bio *bio; - int err; + blk_status_t status; u64 logical; u64 physical; #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO @@ -112,7 +113,7 @@ struct scrub_block { struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; int page_count; atomic_t outstanding_pages; - atomic_t refs; /* free mem on transition to zero */ + refcount_t refs; /* free mem on transition to zero */ struct scrub_ctx *sctx; struct scrub_parity *sparity; struct { @@ -140,9 +141,9 @@ struct scrub_parity { int nsectors; - int stripe_len; + u64 stripe_len; - atomic_t refs; + refcount_t refs; struct list_head spages; @@ -161,14 +162,6 @@ struct scrub_parity { unsigned long bitmap[0]; }; -struct scrub_wr_ctx { - struct scrub_bio *wr_curr_bio; - struct btrfs_device *tgtdev; - int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ - atomic_t flush_all_writes; - struct mutex wr_lock; -}; - struct scrub_ctx { struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; struct btrfs_fs_info *fs_info; @@ -183,11 +176,14 @@ struct scrub_ctx { atomic_t cancel_req; int readonly; int pages_per_rd_bio; - u32 sectorsize; - u32 nodesize; int is_dev_replace; - struct scrub_wr_ctx wr_ctx; + + struct scrub_bio *wr_curr_bio; + struct mutex wr_lock; + int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ + atomic_t flush_all_writes; + struct btrfs_device *wr_tgtdev; /* * statistics @@ -202,7 +198,7 @@ struct scrub_ctx { * doesn't free the scrub context before or while the workers are * doing the wakeup() call. */ - atomic_t refs; + refcount_t refs; }; struct scrub_fixup_nodatasum { @@ -240,6 +236,13 @@ struct scrub_warning { struct btrfs_device *dev; }; +struct full_stripe_lock { + struct rb_node node; + u64 logical; + u64 refs; + struct mutex mutex; +}; + static void scrub_pending_bio_inc(struct scrub_ctx *sctx); static void scrub_pending_bio_dec(struct scrub_ctx *sctx); static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); @@ -282,10 +285,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, u64 *extent_physical, struct btrfs_device **extent_dev, int *extent_mirror_num); -static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx, - struct btrfs_device *dev, - int is_dev_replace); -static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage); static void scrub_wr_submit(struct scrub_ctx *sctx); @@ -305,7 +304,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx); static void scrub_pending_bio_inc(struct scrub_ctx *sctx) { - atomic_inc(&sctx->refs); + refcount_inc(&sctx->refs); atomic_inc(&sctx->bios_in_flight); } @@ -349,6 +348,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) } /* + * Insert new full stripe lock into full stripe locks tree + * + * Return pointer to existing or newly inserted full_stripe_lock structure if + * everything works well. + * Return ERR_PTR(-ENOMEM) if we failed to allocate memory + * + * NOTE: caller must hold full_stripe_locks_root->lock before calling this + * function + */ +static struct full_stripe_lock *insert_full_stripe_lock( + struct btrfs_full_stripe_locks_tree *locks_root, + u64 fstripe_logical) +{ + struct rb_node **p; + struct rb_node *parent = NULL; + struct full_stripe_lock *entry; + struct full_stripe_lock *ret; + + WARN_ON(!mutex_is_locked(&locks_root->lock)); + + p = &locks_root->root.rb_node; + while (*p) { + parent = *p; + entry = rb_entry(parent, struct full_stripe_lock, node); + if (fstripe_logical < entry->logical) { + p = &(*p)->rb_left; + } else if (fstripe_logical > entry->logical) { + p = &(*p)->rb_right; + } else { + entry->refs++; + return entry; + } + } + + /* Insert new lock */ + ret = kmalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return ERR_PTR(-ENOMEM); + ret->logical = fstripe_logical; + ret->refs = 1; + mutex_init(&ret->mutex); + + rb_link_node(&ret->node, parent, p); + rb_insert_color(&ret->node, &locks_root->root); + return ret; +} + +/* + * Search for a full stripe lock of a block group + * + * Return pointer to existing full stripe lock if found + * Return NULL if not found + */ +static struct full_stripe_lock *search_full_stripe_lock( + struct btrfs_full_stripe_locks_tree *locks_root, + u64 fstripe_logical) +{ + struct rb_node *node; + struct full_stripe_lock *entry; + + WARN_ON(!mutex_is_locked(&locks_root->lock)); + + node = locks_root->root.rb_node; + while (node) { + entry = rb_entry(node, struct full_stripe_lock, node); + if (fstripe_logical < entry->logical) + node = node->rb_left; + else if (fstripe_logical > entry->logical) + node = node->rb_right; + else + return entry; + } + return NULL; +} + +/* + * Helper to get full stripe logical from a normal bytenr. + * + * Caller must ensure @cache is a RAID56 block group. + */ +static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache, + u64 bytenr) +{ + u64 ret; + + /* + * Due to chunk item size limit, full stripe length should not be + * larger than U32_MAX. Just a sanity check here. + */ + WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX); + + /* + * round_down() can only handle power of 2, while RAID56 full + * stripe length can be 64KiB * n, so we need to manually round down. + */ + ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) * + cache->full_stripe_len + cache->key.objectid; + return ret; +} + +/* + * Lock a full stripe to avoid concurrency of recovery and read + * + * It's only used for profiles with parities (RAID5/6), for other profiles it + * does nothing. + * + * Return 0 if we locked full stripe covering @bytenr, with a mutex held. + * So caller must call unlock_full_stripe() at the same context. + * + * Return <0 if encounters error. + */ +static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr, + bool *locked_ret) +{ + struct btrfs_block_group_cache *bg_cache; + struct btrfs_full_stripe_locks_tree *locks_root; + struct full_stripe_lock *existing; + u64 fstripe_start; + int ret = 0; + + *locked_ret = false; + bg_cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!bg_cache) { + ASSERT(0); + return -ENOENT; + } + + /* Profiles not based on parity don't need full stripe lock */ + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) + goto out; + locks_root = &bg_cache->full_stripe_locks_root; + + fstripe_start = get_full_stripe_logical(bg_cache, bytenr); + + /* Now insert the full stripe lock */ + mutex_lock(&locks_root->lock); + existing = insert_full_stripe_lock(locks_root, fstripe_start); + mutex_unlock(&locks_root->lock); + if (IS_ERR(existing)) { + ret = PTR_ERR(existing); + goto out; + } + mutex_lock(&existing->mutex); + *locked_ret = true; +out: + btrfs_put_block_group(bg_cache); + return ret; +} + +/* + * Unlock a full stripe. + * + * NOTE: Caller must ensure it's the same context calling corresponding + * lock_full_stripe(). + * + * Return 0 if we unlock full stripe without problem. + * Return <0 for error + */ +static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr, + bool locked) +{ + struct btrfs_block_group_cache *bg_cache; + struct btrfs_full_stripe_locks_tree *locks_root; + struct full_stripe_lock *fstripe_lock; + u64 fstripe_start; + bool freeit = false; + int ret = 0; + + /* If we didn't acquire full stripe lock, no need to continue */ + if (!locked) + return 0; + + bg_cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!bg_cache) { + ASSERT(0); + return -ENOENT; + } + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) + goto out; + + locks_root = &bg_cache->full_stripe_locks_root; + fstripe_start = get_full_stripe_logical(bg_cache, bytenr); + + mutex_lock(&locks_root->lock); + fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start); + /* Unpaired unlock_full_stripe() detected */ + if (!fstripe_lock) { + WARN_ON(1); + ret = -ENOENT; + mutex_unlock(&locks_root->lock); + goto out; + } + + if (fstripe_lock->refs == 0) { + WARN_ON(1); + btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow", + fstripe_lock->logical); + } else { + fstripe_lock->refs--; + } + + if (fstripe_lock->refs == 0) { + rb_erase(&fstripe_lock->node, &locks_root->root); + freeit = true; + } + mutex_unlock(&locks_root->lock); + + mutex_unlock(&fstripe_lock->mutex); + if (freeit) + kfree(fstripe_lock); +out: + btrfs_put_block_group(bg_cache); + return ret; +} + +/* * used for workers that require transaction commits (i.e., for the * NOCOW case) */ @@ -356,7 +571,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) { struct btrfs_fs_info *fs_info = sctx->fs_info; - atomic_inc(&sctx->refs); + refcount_inc(&sctx->refs); /* * increment scrubs_running to prevent cancel requests from * completing as long as a worker is running. we must also @@ -420,8 +635,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) if (!sctx) return; - scrub_free_wr_ctx(&sctx->wr_ctx); - /* this can happen when scrub is cancelled */ if (sctx->curr != -1) { struct scrub_bio *sbio = sctx->bios[sctx->curr]; @@ -441,13 +654,14 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) kfree(sbio); } + kfree(sctx->wr_curr_bio); scrub_free_csums(sctx); kfree(sctx); } static void scrub_put_ctx(struct scrub_ctx *sctx) { - if (atomic_dec_and_test(&sctx->refs)) + if (refcount_dec_and_test(&sctx->refs)) scrub_free_ctx(sctx); } @@ -457,12 +671,11 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) struct scrub_ctx *sctx; int i; struct btrfs_fs_info *fs_info = dev->fs_info; - int ret; sctx = kzalloc(sizeof(*sctx), GFP_KERNEL); if (!sctx) goto nomem; - atomic_set(&sctx->refs, 1); + refcount_set(&sctx->refs, 1); sctx->is_dev_replace = is_dev_replace; sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; sctx->curr = -1; @@ -487,8 +700,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sctx->bios[i]->next_free = -1; } sctx->first_free = 0; - sctx->nodesize = fs_info->nodesize; - sctx->sectorsize = fs_info->sectorsize; atomic_set(&sctx->bios_in_flight, 0); atomic_set(&sctx->workers_pending, 0); atomic_set(&sctx->cancel_req, 0); @@ -499,12 +710,16 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) spin_lock_init(&sctx->stat_lock); init_waitqueue_head(&sctx->list_wait); - ret = scrub_setup_wr_ctx(&sctx->wr_ctx, - fs_info->dev_replace.tgtdev, is_dev_replace); - if (ret) { - scrub_free_ctx(sctx); - return ERR_PTR(ret); + WARN_ON(sctx->wr_curr_bio != NULL); + mutex_init(&sctx->wr_lock); + sctx->wr_curr_bio = NULL; + if (is_dev_replace) { + WARN_ON(!fs_info->dev_replace.tgtdev); + sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; + sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; + atomic_set(&sctx->flush_all_writes, 0); } + return sctx; nomem: @@ -519,6 +734,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, u32 nlink; int ret; int i; + unsigned nofs_flag; struct extent_buffer *eb; struct btrfs_inode_item *inode_item; struct scrub_warning *swarn = warn_ctx; @@ -557,7 +773,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, nlink = btrfs_inode_nlink(eb, inode_item); btrfs_release_path(swarn->path); + /* + * init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub + * uses GFP_NOFS in this context, so we keep it consistent but it does + * not seem to be strictly necessary. + */ + nofs_flag = memalloc_nofs_save(); ipath = init_ipath(4096, local_root, swarn->path); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(ipath)) { ret = PTR_ERR(ipath); ipath = NULL; @@ -731,7 +954,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) ret = -EIO; goto out; } - ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE, + ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE, fixup->logical, page, offset - page_offset(page), fixup->mirror_num); @@ -857,12 +1080,14 @@ out: static inline void scrub_get_recover(struct scrub_recover *recover) { - atomic_inc(&recover->refs); + refcount_inc(&recover->refs); } -static inline void scrub_put_recover(struct scrub_recover *recover) +static inline void scrub_put_recover(struct btrfs_fs_info *fs_info, + struct scrub_recover *recover) { - if (atomic_dec_and_test(&recover->refs)) { + if (refcount_dec_and_test(&recover->refs)) { + btrfs_bio_counter_dec(fs_info); btrfs_put_bbio(recover->bbio); kfree(recover); } @@ -892,6 +1117,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) int mirror_index; int page_num; int success; + bool full_stripe_locked; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -917,6 +1143,24 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) have_csum = sblock_to_check->pagev[0]->have_csum; dev = sblock_to_check->pagev[0]->dev; + /* + * For RAID5/6, race can happen for a different device scrub thread. + * For data corruption, Parity and Data threads will both try + * to recovery the data. + * Race can lead to doubly added csum error, or even unrecoverable + * error. + */ + ret = lock_full_stripe(fs_info, logical, &full_stripe_locked); + if (ret < 0) { + spin_lock(&sctx->stat_lock); + if (ret == -ENOMEM) + sctx->stat.malloc_errors++; + sctx->stat.read_errors++; + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); + return ret; + } + if (sctx->is_dev_replace && !is_metadata && !have_csum) { sblocks_for_recheck = NULL; goto nodatasum_case; @@ -1241,7 +1485,7 @@ out: sblock->pagev[page_index]->sblock = NULL; recover = sblock->pagev[page_index]->recover; if (recover) { - scrub_put_recover(recover); + scrub_put_recover(fs_info, recover); sblock->pagev[page_index]->recover = NULL; } @@ -1251,6 +1495,9 @@ out: kfree(sblocks_for_recheck); } + ret = unlock_full_stripe(fs_info, logical, full_stripe_locked); + if (ret < 0) + return ret; return 0; } @@ -1330,20 +1577,23 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, * with a length of PAGE_SIZE, each returned stripe * represents one mirror */ + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &mapped_length, &bbio, 0, 1); + logical, &mapped_length, &bbio); if (ret || !bbio || mapped_length < sublen) { btrfs_put_bbio(bbio); + btrfs_bio_counter_dec(fs_info); return -EIO; } recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); if (!recover) { btrfs_put_bbio(bbio); + btrfs_bio_counter_dec(fs_info); return -ENOMEM; } - atomic_set(&recover->refs, 1); + refcount_set(&recover->refs, 1); recover->bbio = bbio; recover->map_length = mapped_length; @@ -1365,7 +1615,7 @@ leave_nomem: spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); - scrub_put_recover(recover); + scrub_put_recover(fs_info, recover); return -ENOMEM; } scrub_page_get(page); @@ -1407,7 +1657,7 @@ leave_nomem: scrub_get_recover(recover); page->recover = recover; } - scrub_put_recover(recover); + scrub_put_recover(fs_info, recover); length -= sublen; logical += sublen; page_index++; @@ -1418,14 +1668,14 @@ leave_nomem: struct scrub_bio_ret { struct completion event; - int error; + blk_status_t status; }; static void scrub_bio_wait_endio(struct bio *bio) { struct scrub_bio_ret *ret = bio->bi_private; - ret->error = bio->bi_error; + ret->status = bio->bi_status; complete(&ret->event); } @@ -1443,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, int ret; init_completion(&done.event); - done.error = 0; + done.status = 0; bio->bi_iter.bi_sector = page->logical >> 9; bio->bi_private = &done; bio->bi_end_io = scrub_bio_wait_endio; @@ -1455,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, return ret; wait_for_completion(&done.event); - if (done.error) + if (done.status) return -EIO; return 0; @@ -1487,24 +1737,23 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } WARN_ON(!page->page); - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if (!bio) { - page->io_error = 1; - sblock->no_io_error_seen = 0; - continue; - } + bio = btrfs_io_bio_alloc(1); bio->bi_bdev = page->dev->bdev; bio_add_page(bio, page->page, PAGE_SIZE, 0); if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { - if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) + if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) { + page->io_error = 1; sblock->no_io_error_seen = 0; + } } else { bio->bi_iter.bi_sector = page->physical >> 9; bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (btrfsic_submit_bio_wait(bio)) + if (btrfsic_submit_bio_wait(bio)) { + page->io_error = 1; sblock->no_io_error_seen = 0; + } } bio_put(bio); @@ -1576,9 +1825,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; } - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if (!bio) - return -EIO; + bio = btrfs_io_bio_alloc(1); bio->bi_bdev = page_bad->dev->bdev; bio->bi_iter.bi_sector = page_bad->physical >> 9; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -1634,7 +1881,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, if (spage->io_error) { void *mapped_buffer = kmap_atomic(spage->page); - memset(mapped_buffer, 0, PAGE_SIZE); + clear_page(mapped_buffer); flush_dcache_page(spage->page); kunmap_atomic(mapped_buffer); } @@ -1644,37 +1891,31 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage) { - struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; struct scrub_bio *sbio; int ret; - mutex_lock(&wr_ctx->wr_lock); + mutex_lock(&sctx->wr_lock); again: - if (!wr_ctx->wr_curr_bio) { - wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), + if (!sctx->wr_curr_bio) { + sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio), GFP_KERNEL); - if (!wr_ctx->wr_curr_bio) { - mutex_unlock(&wr_ctx->wr_lock); + if (!sctx->wr_curr_bio) { + mutex_unlock(&sctx->wr_lock); return -ENOMEM; } - wr_ctx->wr_curr_bio->sctx = sctx; - wr_ctx->wr_curr_bio->page_count = 0; + sctx->wr_curr_bio->sctx = sctx; + sctx->wr_curr_bio->page_count = 0; } - sbio = wr_ctx->wr_curr_bio; + sbio = sctx->wr_curr_bio; if (sbio->page_count == 0) { struct bio *bio; sbio->physical = spage->physical_for_dev_replace; sbio->logical = spage->logical; - sbio->dev = wr_ctx->tgtdev; + sbio->dev = sctx->wr_tgtdev; bio = sbio->bio; if (!bio) { - bio = btrfs_io_bio_alloc(GFP_KERNEL, - wr_ctx->pages_per_wr_bio); - if (!bio) { - mutex_unlock(&wr_ctx->wr_lock); - return -ENOMEM; - } + bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio); sbio->bio = bio; } @@ -1683,7 +1924,7 @@ again: bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - sbio->err = 0; + sbio->status = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical_for_dev_replace || sbio->logical + sbio->page_count * PAGE_SIZE != @@ -1697,7 +1938,7 @@ again: if (sbio->page_count < 1) { bio_put(sbio->bio); sbio->bio = NULL; - mutex_unlock(&wr_ctx->wr_lock); + mutex_unlock(&sctx->wr_lock); return -EIO; } scrub_wr_submit(sctx); @@ -1707,23 +1948,22 @@ again: sbio->pagev[sbio->page_count] = spage; scrub_page_get(spage); sbio->page_count++; - if (sbio->page_count == wr_ctx->pages_per_wr_bio) + if (sbio->page_count == sctx->pages_per_wr_bio) scrub_wr_submit(sctx); - mutex_unlock(&wr_ctx->wr_lock); + mutex_unlock(&sctx->wr_lock); return 0; } static void scrub_wr_submit(struct scrub_ctx *sctx) { - struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; struct scrub_bio *sbio; - if (!wr_ctx->wr_curr_bio) + if (!sctx->wr_curr_bio) return; - sbio = wr_ctx->wr_curr_bio; - wr_ctx->wr_curr_bio = NULL; + sbio = sctx->wr_curr_bio; + sctx->wr_curr_bio = NULL; WARN_ON(!sbio->bio->bi_bdev); scrub_pending_bio_inc(sctx); /* process all writes in a single worker thread. Then the block layer @@ -1738,7 +1978,7 @@ static void scrub_wr_bio_end_io(struct bio *bio) struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->fs_info; - sbio->err = bio->bi_error; + sbio->status = bio->bi_status; sbio->bio = bio; btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, @@ -1753,7 +1993,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) int i; WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); - if (sbio->err) { + if (sbio->status) { struct btrfs_dev_replace *dev_replace = &sbio->sctx->fs_info->dev_replace; @@ -1827,7 +2067,7 @@ static int scrub_checksum_data(struct scrub_block *sblock) page = sblock->pagev[0]->page; buffer = kmap_atomic(page); - len = sctx->sectorsize; + len = sctx->fs_info->sectorsize; index = 0; for (;;) { u64 l = min_t(u64, len, PAGE_SIZE); @@ -1892,7 +2132,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) BTRFS_UUID_SIZE)) sblock->header_error = 1; - len = sctx->nodesize - BTRFS_CSUM_SIZE; + len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; index = 0; @@ -1998,12 +2238,12 @@ static int scrub_checksum_super(struct scrub_block *sblock) static void scrub_block_get(struct scrub_block *sblock) { - atomic_inc(&sblock->refs); + refcount_inc(&sblock->refs); } static void scrub_block_put(struct scrub_block *sblock) { - if (atomic_dec_and_test(&sblock->refs)) { + if (refcount_dec_and_test(&sblock->refs)) { int i; if (sblock->sparity) @@ -2075,10 +2315,7 @@ again: sbio->dev = spage->dev; bio = sbio->bio; if (!bio) { - bio = btrfs_io_bio_alloc(GFP_KERNEL, - sctx->pages_per_rd_bio); - if (!bio) - return -ENOMEM; + bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio); sbio->bio = bio; } @@ -2087,7 +2324,7 @@ again: bio->bi_bdev = sbio->dev->bdev; bio->bi_iter.bi_sector = sbio->physical >> 9; bio_set_op_attrs(bio, REQ_OP_READ, 0); - sbio->err = 0; + sbio->status = 0; } else if (sbio->physical + sbio->page_count * PAGE_SIZE != spage->physical || sbio->logical + sbio->page_count * PAGE_SIZE != @@ -2123,7 +2360,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio) struct scrub_block *sblock = bio->bi_private; struct btrfs_fs_info *fs_info = sblock->sctx->fs_info; - if (bio->bi_error) + if (bio->bi_status) sblock->no_io_error_seen = 0; bio_put(bio); @@ -2166,10 +2403,10 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) scrub_block_put(sblock); if (sctx->is_dev_replace && - atomic_read(&sctx->wr_ctx.flush_all_writes)) { - mutex_lock(&sctx->wr_ctx.wr_lock); + atomic_read(&sctx->flush_all_writes)) { + mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); - mutex_unlock(&sctx->wr_ctx.wr_lock); + mutex_unlock(&sctx->wr_lock); } scrub_pending_bio_dec(sctx); @@ -2187,8 +2424,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) int ret; int i; + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &length, &bbio, 0, 1); + &length, &bbio); if (ret || !bbio || !bbio->raid_map) goto bbio_out; @@ -2203,10 +2441,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) goto bbio_out; } - bio = btrfs_io_bio_alloc(GFP_NOFS, 0); - if (!bio) - goto bbio_out; - + bio = btrfs_io_bio_alloc(0); bio->bi_iter.bi_sector = logical >> 9; bio->bi_private = sblock; bio->bi_end_io = scrub_missing_raid56_end_io; @@ -2231,6 +2466,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) rbio_out: bio_put(bio); bbio_out: + btrfs_bio_counter_dec(fs_info); btrfs_put_bbio(bbio); spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2255,7 +2491,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, /* one ref inside this function, plus one for each page added to * a bio later on */ - atomic_set(&sblock->refs, 1); + refcount_set(&sblock->refs, 1); sblock->sctx = sctx; sblock->no_io_error_seen = 1; @@ -2332,7 +2568,7 @@ static void scrub_bio_end_io(struct bio *bio) struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->fs_info; - sbio->err = bio->bi_error; + sbio->status = bio->bi_status; sbio->bio = bio; btrfs_queue_work(fs_info->scrub_workers, &sbio->work); @@ -2345,7 +2581,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) int i; BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); - if (sbio->err) { + if (sbio->status) { for (i = 0; i < sbio->page_count; i++) { struct scrub_page *spage = sbio->pagev[i]; @@ -2372,10 +2608,10 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) spin_unlock(&sctx->list_lock); if (sctx->is_dev_replace && - atomic_read(&sctx->wr_ctx.flush_all_writes)) { - mutex_lock(&sctx->wr_ctx.wr_lock); + atomic_read(&sctx->flush_all_writes)) { + mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); - mutex_unlock(&sctx->wr_ctx.wr_lock); + mutex_unlock(&sctx->wr_lock); } scrub_pending_bio_dec(sctx); @@ -2385,7 +2621,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, unsigned long *bitmap, u64 start, u64 len) { - u32 offset; + u64 offset; int nsectors; int sectorsize = sparity->sctx->fs_info->sectorsize; @@ -2395,8 +2631,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, } start -= sparity->logic_start; - start = div_u64_rem(start, sparity->stripe_len, &offset); - offset /= sectorsize; + start = div64_u64_rem(start, sparity->stripe_len, &offset); + offset = div_u64(offset, sectorsize); nsectors = (int)len / sectorsize; if (offset + nsectors <= sparity->nsectors) { @@ -2470,8 +2706,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) if (!sum) return 0; - index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize; - num_sectors = sum->len / sctx->sectorsize; + index = ((u32)(logical - sum->bytenr)) / sctx->fs_info->sectorsize; + num_sectors = sum->len / sctx->fs_info->sectorsize; memcpy(csum, sum->sums + index, sctx->csum_size); if (index == num_sectors - 1) { list_del(&sum->list); @@ -2490,19 +2726,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, u32 blocksize; if (flags & BTRFS_EXTENT_FLAG_DATA) { - blocksize = sctx->sectorsize; + blocksize = sctx->fs_info->sectorsize; spin_lock(&sctx->stat_lock); sctx->stat.data_extents_scrubbed++; sctx->stat.data_bytes_scrubbed += len; spin_unlock(&sctx->stat_lock); } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - blocksize = sctx->nodesize; + blocksize = sctx->fs_info->nodesize; spin_lock(&sctx->stat_lock); sctx->stat.tree_extents_scrubbed++; sctx->stat.tree_bytes_scrubbed += len; spin_unlock(&sctx->stat_lock); } else { - blocksize = sctx->sectorsize; + blocksize = sctx->fs_info->sectorsize; WARN_ON(1); } @@ -2555,7 +2791,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, /* one ref inside this function, plus one for each page added to * a bio later on */ - atomic_set(&sblock->refs, 1); + refcount_set(&sblock->refs, 1); sblock->sctx = sctx; sblock->no_io_error_seen = 1; sblock->sparity = sparity; @@ -2636,11 +2872,11 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, } if (flags & BTRFS_EXTENT_FLAG_DATA) { - blocksize = sctx->sectorsize; + blocksize = sctx->fs_info->sectorsize; } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - blocksize = sctx->nodesize; + blocksize = sctx->fs_info->nodesize; } else { - blocksize = sctx->sectorsize; + blocksize = sctx->fs_info->sectorsize; WARN_ON(1); } @@ -2694,7 +2930,7 @@ static int get_raid56_logic_offset(u64 physical, int num, for (i = 0; i < nr_data_stripes(map); i++) { *offset = last_offset + i * map->stripe_len; - stripe_nr = div_u64(*offset, map->stripe_len); + stripe_nr = div64_u64(*offset, map->stripe_len); stripe_nr = div_u64(stripe_nr, nr_data_stripes(map)); /* Work out the disk rotation on this stripe-set */ @@ -2748,7 +2984,7 @@ static void scrub_parity_bio_endio(struct bio *bio) struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; struct btrfs_fs_info *fs_info = sparity->sctx->fs_info; - if (bio->bi_error) + if (bio->bi_status) bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); @@ -2765,7 +3001,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) struct btrfs_fs_info *fs_info = sctx->fs_info; struct bio *bio; struct btrfs_raid_bio *rbio; - struct scrub_page *spage; struct btrfs_bio *bbio = NULL; u64 length; int ret; @@ -2775,15 +3010,14 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) goto out; length = sparity->logic_end - sparity->logic_start; + + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start, - &length, &bbio, 0, 1); + &length, &bbio); if (ret || !bbio || !bbio->raid_map) goto bbio_out; - bio = btrfs_io_bio_alloc(GFP_NOFS, 0); - if (!bio) - goto bbio_out; - + bio = btrfs_io_bio_alloc(0); bio->bi_iter.bi_sector = sparity->logic_start >> 9; bio->bi_private = sparity; bio->bi_end_io = scrub_parity_bio_endio; @@ -2795,9 +3029,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) if (!rbio) goto rbio_out; - list_for_each_entry(spage, &sparity->spages, list) - raid56_add_scrub_pages(rbio, spage->page, spage->logical); - scrub_pending_bio_inc(sctx); raid56_parity_submit_scrub_rbio(rbio); return; @@ -2805,6 +3036,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) rbio_out: bio_put(bio); bbio_out: + btrfs_bio_counter_dec(fs_info); btrfs_put_bbio(bbio); bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); @@ -2822,12 +3054,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors) static void scrub_parity_get(struct scrub_parity *sparity) { - atomic_inc(&sparity->refs); + refcount_inc(&sparity->refs); } static void scrub_parity_put(struct scrub_parity *sparity) { - if (!atomic_dec_and_test(&sparity->refs)) + if (!refcount_dec_and_test(&sparity->refs)) return; scrub_parity_check_and_repair(sparity); @@ -2879,7 +3111,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, sparity->scrub_dev = sdev; sparity->logic_start = logic_start; sparity->logic_end = logic_end; - atomic_set(&sparity->refs, 1); + refcount_set(&sparity->refs, 1); INIT_LIST_HEAD(&sparity->spages); sparity->dbitmap = sparity->bitmap; sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; @@ -3050,9 +3282,9 @@ out: logic_end - logic_start); scrub_parity_put(sparity); scrub_submit(sctx); - mutex_lock(&sctx->wr_ctx.wr_lock); + mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); - mutex_unlock(&sctx->wr_ctx.wr_lock); + mutex_unlock(&sctx->wr_lock); btrfs_release_path(path); return ret < 0 ? ret : 0; @@ -3098,7 +3330,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, physical = map->stripes[num].physical; offset = 0; - nstripes = div_u64(length, map->stripe_len); + nstripes = div64_u64(length, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { offset = map->stripe_len * num; increment = map->stripe_len * map->num_stripes; @@ -3208,14 +3440,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, */ if (atomic_read(&fs_info->scrub_pause_req)) { /* push queued extents */ - atomic_set(&sctx->wr_ctx.flush_all_writes, 1); + atomic_set(&sctx->flush_all_writes, 1); scrub_submit(sctx); - mutex_lock(&sctx->wr_ctx.wr_lock); + mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); - mutex_unlock(&sctx->wr_ctx.wr_lock); + mutex_unlock(&sctx->wr_lock); wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); - atomic_set(&sctx->wr_ctx.flush_all_writes, 0); + atomic_set(&sctx->flush_all_writes, 0); scrub_blocked_if_needed(fs_info); } @@ -3422,9 +3654,9 @@ skip: out: /* push queued extents */ scrub_submit(sctx); - mutex_lock(&sctx->wr_ctx.wr_lock); + mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); - mutex_unlock(&sctx->wr_ctx.wr_lock); + mutex_unlock(&sctx->wr_lock); blk_finish_plug(&plug); btrfs_free_path(path); @@ -3604,7 +3836,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, */ btrfs_wait_block_group_reservations(cache); btrfs_wait_nocow_writers(cache); - ret = btrfs_wait_ordered_roots(fs_info, -1, + ret = btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->key.objectid, cache->key.offset); if (ret > 0) { @@ -3661,11 +3893,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * write requests are really completed when bios_in_flight * changes to 0. */ - atomic_set(&sctx->wr_ctx.flush_all_writes, 1); + atomic_set(&sctx->flush_all_writes, 1); scrub_submit(sctx); - mutex_lock(&sctx->wr_ctx.wr_lock); + mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); - mutex_unlock(&sctx->wr_ctx.wr_lock); + mutex_unlock(&sctx->wr_lock); wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); @@ -3679,7 +3911,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, */ wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0); - atomic_set(&sctx->wr_ctx.flush_all_writes, 0); + atomic_set(&sctx->flush_all_writes, 0); scrub_pause_off(fs_info); @@ -4082,32 +4314,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, btrfs_put_bbio(bbio); } -static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx, - struct btrfs_device *dev, - int is_dev_replace) -{ - WARN_ON(wr_ctx->wr_curr_bio != NULL); - - mutex_init(&wr_ctx->wr_lock); - wr_ctx->wr_curr_bio = NULL; - if (!is_dev_replace) - return 0; - - WARN_ON(!dev->bdev); - wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; - wr_ctx->tgtdev = dev; - atomic_set(&wr_ctx->flush_all_writes, 0); - return 0; -} - -static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) -{ - mutex_lock(&wr_ctx->wr_lock); - kfree(wr_ctx->wr_curr_bio); - wr_ctx->wr_curr_bio = NULL; - mutex_unlock(&wr_ctx->wr_lock); -} - static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, int mirror_num, u64 physical_for_dev_replace) { @@ -4410,7 +4616,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, struct btrfs_device *dev; int ret; - dev = sctx->wr_ctx.tgtdev; + dev = sctx->wr_tgtdev; if (!dev) return -EIO; if (!dev->bdev) { @@ -4418,13 +4624,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, "scrub write_page_nocow(bdev == NULL) is unexpected"); return -EIO; } - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); - if (!bio) { - spin_lock(&sctx->stat_lock); - sctx->stat.malloc_errors++; - spin_unlock(&sctx->stat_lock); - return -ENOMEM; - } + bio = btrfs_io_bio_alloc(1); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; |