aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2023-03-29 14:54:57 +0800
committerDavid Sterba <dsterba@suse.com>2023-04-17 18:01:24 +0200
commit13a62fd997f0649d221afc73e301c95c76560506 (patch)
tree340d01a1a1432517672a806841e46ae27c29d0b1 /fs/btrfs/scrub.c
parent001e3fc263ce96eaebc640350a8650a682249cb2 (diff)
btrfs: scrub: remove scrub_bio structure
Since scrub path has been fully moved to scrub_stripe based facilities, no more scrub_bio would be submitted. Thus we can remove it completely, this involves: - SCRUB_SECTORS_PER_BIO macro - SCRUB_BIOS_PER_SCTX macro - SCRUB_MAX_PAGES macro - BTRFS_MAX_MIRRORS macro - scrub_bio structure - scrub_ctx::bios member - scrub_ctx::curr member - scrub_ctx::bios_in_flight member - scrub_ctx::workers_pending member - scrub_ctx::list_lock member - scrub_ctx::list_wait member - function scrub_bio_end_io_worker() - function scrub_pending_bio_inc() - function scrub_pending_bio_dec() - function scrub_throttle() - function scrub_submit() - function scrub_find_csum() - function drop_csum_range() - Some unnecessary flush and scrub pauses Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c245
1 files changed, 6 insertions, 239 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 021dcec6f194..41cba9646931 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -41,14 +41,10 @@
struct scrub_ctx;
/*
- * The following three values only influence the performance.
+ * The following value only influences the performance.
*
- * The last one configures the number of parallel and outstanding I/O
- * operations. The first one configures an upper limit for the number
- * of (dynamically allocated) pages that are added to a bio.
+ * This determines the batch size for stripe submitted in one go.
*/
-#define SCRUB_SECTORS_PER_BIO 32 /* 128KiB per bio for 4KiB pages */
-#define SCRUB_BIOS_PER_SCTX 64 /* 8MiB per device in flight for 4KiB pages */
#define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
/*
@@ -57,19 +53,6 @@ struct scrub_ctx;
*/
#define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K)
-#define SCRUB_MAX_PAGES (DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE))
-
-/*
- * Maximum number of mirrors that can be available for all profiles counting
- * the target device of dev-replace as one. During an active device replace
- * procedure, the target device of the copy operation is a mirror for the
- * filesystem data as well that can be used to read data in order to repair
- * read errors on other disks.
- *
- * Current value is derived from RAID1C4 with 4 copies.
- */
-#define BTRFS_MAX_MIRRORS (4 + 1)
-
/* Represent one sector and its needed info to verify the content. */
struct scrub_sector_verification {
bool is_metadata;
@@ -182,31 +165,12 @@ struct scrub_stripe {
struct work_struct work;
};
-struct scrub_bio {
- int index;
- struct scrub_ctx *sctx;
- struct btrfs_device *dev;
- struct bio *bio;
- blk_status_t status;
- u64 logical;
- u64 physical;
- int sector_count;
- int next_free;
- struct work_struct work;
-};
-
struct scrub_ctx {
- struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
struct scrub_stripe *raid56_data_stripes;
struct btrfs_fs_info *fs_info;
int first_free;
- int curr;
int cur_stripe;
- atomic_t bios_in_flight;
- atomic_t workers_pending;
- spinlock_t list_lock;
- wait_queue_head_t list_wait;
struct list_head csum_list;
atomic_t cancel_req;
int readonly;
@@ -305,22 +269,8 @@ static void wait_scrub_stripe_io(struct scrub_stripe *stripe)
wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0);
}
-static void scrub_bio_end_io_worker(struct work_struct *work);
static void scrub_put_ctx(struct scrub_ctx *sctx);
-static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
-{
- refcount_inc(&sctx->refs);
- atomic_inc(&sctx->bios_in_flight);
-}
-
-static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
-{
- atomic_dec(&sctx->bios_in_flight);
- wake_up(&sctx->list_wait);
- scrub_put_ctx(sctx);
-}
-
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
{
while (atomic_read(&fs_info->scrub_pause_req)) {
@@ -371,21 +321,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
if (!sctx)
return;
- /* this can happen when scrub is cancelled */
- if (sctx->curr != -1) {
- struct scrub_bio *sbio = sctx->bios[sctx->curr];
-
- bio_put(sbio->bio);
- }
-
- for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
- struct scrub_bio *sbio = sctx->bios[i];
-
- if (!sbio)
- break;
- kfree(sbio);
- }
-
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
release_scrub_stripe(&sctx->stripes[i]);
@@ -410,28 +345,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
goto nomem;
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
- sctx->sectors_per_bio = SCRUB_SECTORS_PER_BIO;
- sctx->curr = -1;
sctx->fs_info = fs_info;
INIT_LIST_HEAD(&sctx->csum_list);
- for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
- struct scrub_bio *sbio;
-
- sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
- if (!sbio)
- goto nomem;
- sctx->bios[i] = sbio;
-
- sbio->index = i;
- sbio->sctx = sctx;
- sbio->sector_count = 0;
- INIT_WORK(&sbio->work, scrub_bio_end_io_worker);
-
- if (i != SCRUB_BIOS_PER_SCTX - 1)
- sctx->bios[i]->next_free = i + 1;
- else
- sctx->bios[i]->next_free = -1;
- }
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
int ret;
@@ -441,13 +356,9 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
sctx->stripes[i].sctx = sctx;
}
sctx->first_free = 0;
- atomic_set(&sctx->bios_in_flight, 0);
- atomic_set(&sctx->workers_pending, 0);
atomic_set(&sctx->cancel_req, 0);
- spin_lock_init(&sctx->list_lock);
spin_lock_init(&sctx->stat_lock);
- init_waitqueue_head(&sctx->list_wait);
sctx->throttle_deadline = 0;
mutex_init(&sctx->wr_lock);
@@ -1286,6 +1197,10 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
}
}
+/*
+ * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
+ * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
+ */
static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *device,
unsigned int bio_size)
{
@@ -1339,112 +1254,6 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
}
/*
- * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
- * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
- */
-static void scrub_throttle(struct scrub_ctx *sctx)
-{
- struct scrub_bio *sbio = sctx->bios[sctx->curr];
-
- scrub_throttle_dev_io(sctx, sbio->dev, sbio->bio->bi_iter.bi_size);
-}
-
-static void scrub_submit(struct scrub_ctx *sctx)
-{
- struct scrub_bio *sbio;
-
- if (sctx->curr == -1)
- return;
-
- scrub_throttle(sctx);
-
- sbio = sctx->bios[sctx->curr];
- sctx->curr = -1;
- scrub_pending_bio_inc(sctx);
- btrfsic_check_bio(sbio->bio);
- submit_bio(sbio->bio);
-}
-
-static void scrub_bio_end_io_worker(struct work_struct *work)
-{
- struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
- struct scrub_ctx *sctx = sbio->sctx;
-
- ASSERT(sbio->sector_count <= SCRUB_SECTORS_PER_BIO);
-
- bio_put(sbio->bio);
- sbio->bio = NULL;
- spin_lock(&sctx->list_lock);
- sbio->next_free = sctx->first_free;
- sctx->first_free = sbio->index;
- spin_unlock(&sctx->list_lock);
-
- scrub_pending_bio_dec(sctx);
-}
-
-static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum)
-{
- sctx->stat.csum_discards += sum->len >> sctx->fs_info->sectorsize_bits;
- list_del(&sum->list);
- kfree(sum);
-}
-
-/*
- * Find the desired csum for range [logical, logical + sectorsize), and store
- * the csum into @csum.
- *
- * The search source is sctx->csum_list, which is a pre-populated list
- * storing bytenr ordered csum ranges. We're responsible to cleanup any range
- * that is before @logical.
- *
- * Return 0 if there is no csum for the range.
- * Return 1 if there is csum for the range and copied to @csum.
- */
-int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
-{
- bool found = false;
-
- while (!list_empty(&sctx->csum_list)) {
- struct btrfs_ordered_sum *sum = NULL;
- unsigned long index;
- unsigned long num_sectors;
-
- sum = list_first_entry(&sctx->csum_list,
- struct btrfs_ordered_sum, list);
- /* The current csum range is beyond our range, no csum found */
- if (sum->bytenr > logical)
- break;
-
- /*
- * The current sum is before our bytenr, since scrub is always
- * done in bytenr order, the csum will never be used anymore,
- * clean it up so that later calls won't bother with the range,
- * and continue search the next range.
- */
- if (sum->bytenr + sum->len <= logical) {
- drop_csum_range(sctx, sum);
- continue;
- }
-
- /* Now the csum range covers our bytenr, copy the csum */
- found = true;
- index = (logical - sum->bytenr) >> sctx->fs_info->sectorsize_bits;
- num_sectors = sum->len >> sctx->fs_info->sectorsize_bits;
-
- memcpy(csum, sum->sums + index * sctx->fs_info->csum_size,
- sctx->fs_info->csum_size);
-
- /* Cleanup the range if we're at the end of the csum range */
- if (index == num_sectors - 1)
- drop_csum_range(sctx, sum);
- break;
- }
- if (!found)
- return 0;
- return 1;
-}
-
-/*
* Given a physical address, this will calculate it's
* logical offset. if this is a parity stripe, it will return
* the most left data stripe's logical offset.
@@ -1624,8 +1433,6 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
if (!btrfs_is_zoned(fs_info))
return 0;
- wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
-
mutex_lock(&sctx->wr_lock);
if (sctx->write_pointer < physical_end) {
ret = btrfs_sync_zone_write_pointer(sctx->wr_tgtdev, logical,
@@ -2153,11 +1960,6 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
/* Paused? */
if (atomic_read(&fs_info->scrub_pause_req)) {
/* Push queued extents */
- scrub_submit(sctx);
- mutex_lock(&sctx->wr_lock);
- mutex_unlock(&sctx->wr_lock);
- wait_event(sctx->list_wait,
- atomic_read(&sctx->bios_in_flight) == 0);
scrub_blocked_if_needed(fs_info);
}
/* Block group removed? */
@@ -2285,8 +2087,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 stripe_logical;
int stop_loop = 0;
- wait_event(sctx->list_wait,
- atomic_read(&sctx->bios_in_flight) == 0);
scrub_blocked_if_needed(fs_info);
if (sctx->is_dev_replace &&
@@ -2402,8 +2202,6 @@ next:
break;
}
out:
- /* push queued extents */
- scrub_submit(sctx);
flush_scrub_stripes(sctx);
if (sctx->raid56_data_stripes) {
for (int i = 0; i < nr_data_stripes(map); i++)
@@ -2728,34 +2526,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset,
dev_extent_len);
-
- /*
- * flush, submit all pending read and write bios, afterwards
- * wait for them.
- * Note that in the dev replace case, a read request causes
- * write requests that are submitted in the read completion
- * worker. Therefore in the current situation, it is required
- * that all write requests are flushed, so that all read and
- * write requests are really completed when bios_in_flight
- * changes to 0.
- */
- scrub_submit(sctx);
-
- wait_event(sctx->list_wait,
- atomic_read(&sctx->bios_in_flight) == 0);
-
- scrub_pause_on(fs_info);
-
- /*
- * must be called before we decrease @scrub_paused.
- * make sure we don't block transaction commit while
- * we are waiting pending workers finished.
- */
- wait_event(sctx->list_wait,
- atomic_read(&sctx->workers_pending) == 0);
-
- scrub_pause_off(fs_info);
-
if (sctx->is_dev_replace &&
!btrfs_finish_block_group_to_copy(dev_replace->srcdev,
cache, found_key.offset))
@@ -3086,12 +2856,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
ret = scrub_enumerate_chunks(sctx, dev, start, end);
memalloc_nofs_restore(nofs_flag);
- wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
atomic_dec(&fs_info->scrubs_running);
wake_up(&fs_info->scrub_pause_wait);
- wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
-
if (progress)
memcpy(progress, &sctx->stat, sizeof(*progress));