From 1d1f25bfda432a6b61bd0205d426226bbbd73504 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 31 Jan 2023 15:07:19 +0800 Subject: md: don't update recovery_cp when curr_resync is ACTIVE Don't update recovery_cp when curr_resync is MD_RESYNC_ACTIVE, otherwise md may skip the resync of the first 3 sectors if the resync procedure is interrupted before the first calling of ->sync_request() as shown below: md_do_sync thread control thread // setup resync mddev->recovery_cp = 0 j = 0 mddev->curr_resync = MD_RESYNC_ACTIVE // e.g., set array as idle set_bit(MD_RECOVERY_INTR, &&mddev_recovery) // resync loop // check INTR before calling sync_request !test_bit(MD_RECOVERY_INTR, &mddev->recovery // resync interrupted // update recovery_cp from 0 to 3 // the resync of three 3 sectors will be skipped mddev->recovery_cp = 3 Fixes: eac58d08d493 ("md: Use enum for overloaded magic numbers used by mddev->curr_resync") Cc: stable@vger.kernel.org # 6.0+ Signed-off-by: Hou Tao Reviewed-by: Logan Gunthorpe Signed-off-by: Song Liu --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 02b0240e7c71..272cc5d14906 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9030,7 +9030,7 @@ void md_do_sync(struct md_thread *thread) mddev->pers->sync_request(mddev, max_sectors, &skipped); if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync >= MD_RESYNC_ACTIVE) { + mddev->curr_resync > MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) { -- cgit From d19329133d25ad3dc32f8a62635692cb2f189014 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Tue, 31 Jan 2023 13:17:09 +0800 Subject: md: Factor out is_md_suspended helper This helper function will be used in next patch. It's easy for understanding. Signed-off-by: Xiao Ni Signed-off-by: Song Liu --- drivers/md/md.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 272cc5d14906..5ec9fdd5e668 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -380,6 +380,13 @@ EXPORT_SYMBOL_GPL(md_new_event); static LIST_HEAD(all_mddevs); static DEFINE_SPINLOCK(all_mddevs_lock); +static bool is_md_suspended(struct mddev *mddev) +{ + if (mddev->suspended) + return true; + else + return false; +} /* Rather than calling directly into the personality make_request function, * IO requests come here first so that we can check if the device is * being suspended pending a reconfiguration. @@ -389,7 +396,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); */ static bool is_suspended(struct mddev *mddev, struct bio *bio) { - if (mddev->suspended) + if (is_md_suspended(mddev)) return true; if (bio_data_dir(bio) != WRITE) return false; @@ -434,7 +441,7 @@ check_suspended: goto check_suspended; } - if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) + if (atomic_dec_and_test(&mddev->active_io) && is_md_suspended(mddev)) wake_up(&mddev->sb_wait); } EXPORT_SYMBOL(md_handle_request); @@ -6219,7 +6226,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { md_bitmap_wait_behind_writes(mddev); - if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) { + if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } @@ -8531,7 +8538,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) return true; wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || - mddev->suspended); + is_md_suspended(mddev)); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { percpu_ref_put(&mddev->writes_pending); return false; @@ -9259,7 +9266,7 @@ void md_check_recovery(struct mddev *mddev) wake_up(&mddev->sb_wait); } - if (mddev->suspended) + if (is_md_suspended(mddev)) return; if (mddev->bitmap) -- cgit From 72adae23a72cb12e2ef0dcd7c0aa042867f27998 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Tue, 31 Jan 2023 13:17:10 +0800 Subject: md: Change active_io to percpu Now the type of active_io is atomic. It's used to count how many ios are in the submitting process and it's added and decreased very time. But it only needs to check if it's zero when suspending the raid. So we can switch atomic to percpu to improve the performance. After switching active_io to percpu type, we use the state of active_io to judge if the raid device is suspended. And we don't need to wake up ->sb_wait in md_handle_request anymore. It's done in the callback function which is registered when initing active_io. The argument mddev->suspended is only used to count how many users are trying to set raid to suspend state. Signed-off-by: Xiao Ni Signed-off-by: Song Liu --- drivers/md/md.c | 43 ++++++++++++++++++++++++------------------- drivers/md/md.h | 2 +- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5ec9fdd5e668..da6370835c47 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -382,10 +382,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); static bool is_md_suspended(struct mddev *mddev) { - if (mddev->suspended) - return true; - else - return false; + return percpu_ref_is_dying(&mddev->active_io); } /* Rather than calling directly into the personality make_request function, * IO requests come here first so that we can check if the device is @@ -412,12 +409,10 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio) void md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: - rcu_read_lock(); if (is_suspended(mddev, bio)) { DEFINE_WAIT(__wait); /* Bail out if REQ_NOWAIT is set for the bio */ if (bio->bi_opf & REQ_NOWAIT) { - rcu_read_unlock(); bio_wouldblock_error(bio); return; } @@ -426,23 +421,19 @@ check_suspended: TASK_UNINTERRUPTIBLE); if (!is_suspended(mddev, bio)) break; - rcu_read_unlock(); schedule(); - rcu_read_lock(); } finish_wait(&mddev->sb_wait, &__wait); } - atomic_inc(&mddev->active_io); - rcu_read_unlock(); + if (!percpu_ref_tryget_live(&mddev->active_io)) + goto check_suspended; if (!mddev->pers->make_request(mddev, bio)) { - atomic_dec(&mddev->active_io); - wake_up(&mddev->sb_wait); + percpu_ref_put(&mddev->active_io); goto check_suspended; } - if (atomic_dec_and_test(&mddev->active_io) && is_md_suspended(mddev)) - wake_up(&mddev->sb_wait); + percpu_ref_put(&mddev->active_io); } EXPORT_SYMBOL(md_handle_request); @@ -490,11 +481,10 @@ void mddev_suspend(struct mddev *mddev) lockdep_assert_held(&mddev->reconfig_mutex); if (mddev->suspended++) return; - synchronize_rcu(); wake_up(&mddev->sb_wait); set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); - smp_mb__after_atomic(); - wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); + percpu_ref_kill(&mddev->active_io); + wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io)); mddev->pers->quiesce(mddev, 1); clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); @@ -512,6 +502,7 @@ void mddev_resume(struct mddev *mddev) lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; + percpu_ref_resurrect(&mddev->active_io); wake_up(&mddev->sb_wait); mddev->pers->quiesce(mddev, 0); @@ -690,7 +681,6 @@ void mddev_init(struct mddev *mddev) timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0); atomic_set(&mddev->active, 1); atomic_set(&mddev->openers, 0); - atomic_set(&mddev->active_io, 0); spin_lock_init(&mddev->lock); atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); @@ -5767,6 +5757,12 @@ static void md_safemode_timeout(struct timer_list *t) } static int start_dirty_degraded; +static void active_io_release(struct percpu_ref *ref) +{ + struct mddev *mddev = container_of(ref, struct mddev, active_io); + + wake_up(&mddev->sb_wait); +} int md_run(struct mddev *mddev) { @@ -5847,10 +5843,15 @@ int md_run(struct mddev *mddev) nowait = nowait && bdev_nowait(rdev->bdev); } + err = percpu_ref_init(&mddev->active_io, active_io_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); + if (err) + return err; + if (!bioset_initialized(&mddev->bio_set)) { err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) - return err; + goto exit_active_io; } if (!bioset_initialized(&mddev->sync_set)) { err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); @@ -6038,6 +6039,8 @@ abort: bioset_exit(&mddev->sync_set); exit_bio_set: bioset_exit(&mddev->bio_set); +exit_active_io: + percpu_ref_exit(&mddev->active_io); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6262,6 +6265,7 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); + percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); } @@ -7835,6 +7839,7 @@ static void md_free_disk(struct gendisk *disk) struct mddev *mddev = disk->private_data; percpu_ref_exit(&mddev->writes_pending); + percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); diff --git a/drivers/md/md.h b/drivers/md/md.h index 554a9026669a..6335cb86e52e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -315,7 +315,7 @@ struct mddev { unsigned long sb_flags; int suspended; - atomic_t active_io; + struct percpu_ref active_io; int ro; int sysfs_active; /* set when sysfs deletes * are happening, so run/ -- cgit From 07dbb13542cc022677b64acc6e0bd0d8a2cbf4dc Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Sat, 21 Jan 2023 09:48:10 +0800 Subject: md: Free writes_pending in md_stop dm raid calls md_stop to stop the raid device. It needs to free the writes_pending here. Signed-off-by: Xiao Ni Signed-off-by: Song Liu --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index da6370835c47..0cf340243ddb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6265,6 +6265,7 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); + percpu_ref_exit(&mddev->writes_pending); percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); -- cgit From ed821cf84e7b969fb5b63598c89d3428a30d8d31 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Feb 2023 15:59:20 +0800 Subject: md: use MD_RESYNC_* whenever possible Just replace magic numbers by MD_RESYNC_* enumerations. Signed-off-by: Hou Tao Reviewed-by: Logan Gunthorpe Signed-off-by: Song Liu --- drivers/md/md.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0cf340243ddb..1961105712b7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6166,7 +6166,7 @@ static void md_clean(struct mddev *mddev) mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; mddev->new_chunk_sectors = 0; - mddev->curr_resync = 0; + mddev->curr_resync = MD_RESYNC_NONE; atomic64_set(&mddev->resync_mismatches, 0); mddev->suspend_lo = mddev->suspend_hi = 0; mddev->sync_speed_min = mddev->sync_speed_max = 0; @@ -8896,7 +8896,7 @@ void md_do_sync(struct md_thread *thread) atomic_set(&mddev->recovery_active, 0); last_check = 0; - if (j>2) { + if (j >= MD_RESYNC_ACTIVE) { pr_debug("md: resuming %s of %s from checkpoint.\n", desc, mdname(mddev)); mddev->curr_resync = j; @@ -8968,7 +8968,7 @@ void md_do_sync(struct md_thread *thread) if (j > max_sectors) /* when skipping, extra large numbers can be returned. */ j = max_sectors; - if (j > 2) + if (j >= MD_RESYNC_ACTIVE) mddev->curr_resync = j; mddev->curr_mark_cnt = io_sectors; if (last_check == 0) -- cgit