aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c390
1 files changed, 203 insertions, 187 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a467b492d4ad..927a43db5dfb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
static void mddev_detach(struct mddev *mddev);
+enum md_ro_state {
+ MD_RDWR,
+ MD_RDONLY,
+ MD_AUTO_READ,
+ MD_MAX_STATE
+};
+
+static bool md_is_rdwr(struct mddev *mddev)
+{
+ return (mddev->ro == MD_RDWR);
+}
+
/*
* Default number of read corrections we'll attempt on an rdev
* before ejecting it from the array. We divide the read error
@@ -368,6 +380,10 @@ EXPORT_SYMBOL_GPL(md_new_event);
static LIST_HEAD(all_mddevs);
static DEFINE_SPINLOCK(all_mddevs_lock);
+static bool is_md_suspended(struct mddev *mddev)
+{
+ return percpu_ref_is_dying(&mddev->active_io);
+}
/* Rather than calling directly into the personality make_request function,
* IO requests come here first so that we can check if the device is
* being suspended pending a reconfiguration.
@@ -377,7 +393,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
*/
static bool is_suspended(struct mddev *mddev, struct bio *bio)
{
- if (mddev->suspended)
+ if (is_md_suspended(mddev))
return true;
if (bio_data_dir(bio) != WRITE)
return false;
@@ -393,12 +409,10 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)
void md_handle_request(struct mddev *mddev, struct bio *bio)
{
check_suspended:
- rcu_read_lock();
if (is_suspended(mddev, bio)) {
DEFINE_WAIT(__wait);
/* Bail out if REQ_NOWAIT is set for the bio */
if (bio->bi_opf & REQ_NOWAIT) {
- rcu_read_unlock();
bio_wouldblock_error(bio);
return;
}
@@ -407,23 +421,19 @@ check_suspended:
TASK_UNINTERRUPTIBLE);
if (!is_suspended(mddev, bio))
break;
- rcu_read_unlock();
schedule();
- rcu_read_lock();
}
finish_wait(&mddev->sb_wait, &__wait);
}
- atomic_inc(&mddev->active_io);
- rcu_read_unlock();
+ if (!percpu_ref_tryget_live(&mddev->active_io))
+ goto check_suspended;
if (!mddev->pers->make_request(mddev, bio)) {
- atomic_dec(&mddev->active_io);
- wake_up(&mddev->sb_wait);
+ percpu_ref_put(&mddev->active_io);
goto check_suspended;
}
- if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
- wake_up(&mddev->sb_wait);
+ percpu_ref_put(&mddev->active_io);
}
EXPORT_SYMBOL(md_handle_request);
@@ -443,8 +453,10 @@ static void md_submit_bio(struct bio *bio)
}
bio = bio_split_to_limits(bio);
+ if (!bio)
+ return;
- if (mddev->ro == 1 && unlikely(rw == WRITE)) {
+ if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
@@ -469,11 +481,10 @@ void mddev_suspend(struct mddev *mddev)
lockdep_assert_held(&mddev->reconfig_mutex);
if (mddev->suspended++)
return;
- synchronize_rcu();
wake_up(&mddev->sb_wait);
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
- smp_mb__after_atomic();
- wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+ percpu_ref_kill(&mddev->active_io);
+ wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
mddev->pers->quiesce(mddev, 1);
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
@@ -491,6 +502,7 @@ void mddev_resume(struct mddev *mddev)
lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended)
return;
+ percpu_ref_resurrect(&mddev->active_io);
wake_up(&mddev->sb_wait);
mddev->pers->quiesce(mddev, 0);
@@ -509,13 +521,14 @@ static void md_end_flush(struct bio *bio)
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
+ bio_put(bio);
+
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work);
}
- bio_put(bio);
}
static void md_submit_flush_data(struct work_struct *ws);
@@ -668,7 +681,6 @@ void mddev_init(struct mddev *mddev)
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
atomic_set(&mddev->active, 1);
atomic_set(&mddev->openers, 0);
- atomic_set(&mddev->active_io, 0);
spin_lock_init(&mddev->lock);
atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
@@ -913,10 +925,12 @@ static void super_written(struct bio *bio)
} else
clear_bit(LastDev, &rdev->flags);
+ bio_put(bio);
+
+ rdev_dec_pending(rdev, mddev);
+
if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait);
- rdev_dec_pending(rdev, mddev);
- bio_put(bio);
}
void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
@@ -2453,7 +2467,22 @@ static void rdev_delayed_delete(struct work_struct *ws)
kobject_put(&rdev->kobj);
}
-static void unbind_rdev_from_array(struct md_rdev *rdev)
+void md_autodetect_dev(dev_t dev);
+
+static void export_rdev(struct md_rdev *rdev)
+{
+ pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
+ md_rdev_clear(rdev);
+#ifndef MODULE
+ if (test_bit(AutoDetected, &rdev->flags))
+ md_autodetect_dev(rdev->bdev->bd_dev);
+#endif
+ blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ rdev->bdev = NULL;
+ kobject_put(&rdev->kobj);
+}
+
+static void md_kick_rdev_from_array(struct md_rdev *rdev)
{
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
@@ -2476,56 +2505,8 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
INIT_WORK(&rdev->del_work, rdev_delayed_delete);
kobject_get(&rdev->kobj);
queue_work(md_rdev_misc_wq, &rdev->del_work);
-}
-
-/*
- * prevent the device from being mounted, repartitioned or
- * otherwise reused by a RAID array (or any other kernel
- * subsystem), by bd_claiming the device.
- */
-static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
-{
- int err = 0;
- struct block_device *bdev;
-
- bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- shared ? (struct md_rdev *)lock_rdev : rdev);
- if (IS_ERR(bdev)) {
- pr_warn("md: could not open device unknown-block(%u,%u).\n",
- MAJOR(dev), MINOR(dev));
- return PTR_ERR(bdev);
- }
- rdev->bdev = bdev;
- return err;
-}
-
-static void unlock_rdev(struct md_rdev *rdev)
-{
- struct block_device *bdev = rdev->bdev;
- rdev->bdev = NULL;
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void md_autodetect_dev(dev_t dev);
-
-static void export_rdev(struct md_rdev *rdev)
-{
- pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
- md_rdev_clear(rdev);
-#ifndef MODULE
- if (test_bit(AutoDetected, &rdev->flags))
- md_autodetect_dev(rdev->bdev->bd_dev);
-#endif
- unlock_rdev(rdev);
- kobject_put(&rdev->kobj);
-}
-
-void md_kick_rdev_from_array(struct md_rdev *rdev)
-{
- unbind_rdev_from_array(rdev);
export_rdev(rdev);
}
-EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
static void export_array(struct mddev *mddev)
{
@@ -2639,7 +2620,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
int any_badblocks_changed = 0;
int ret = -1;
- if (mddev->ro) {
+ if (!md_is_rdwr(mddev)) {
if (force_change)
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return;
@@ -3660,9 +3641,10 @@ EXPORT_SYMBOL_GPL(md_rdev_init);
*/
static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
{
- int err;
+ static struct md_rdev claim_rdev; /* just for claiming the bdev */
struct md_rdev *rdev;
sector_t size;
+ int err;
rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
if (!rdev)
@@ -3670,14 +3652,20 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
err = md_rdev_init(rdev);
if (err)
- goto abort_free;
+ goto out_free_rdev;
err = alloc_disk_sb(rdev);
if (err)
- goto abort_free;
+ goto out_clear_rdev;
- err = lock_rdev(rdev, newdev, super_format == -2);
- if (err)
- goto abort_free;
+ rdev->bdev = blkdev_get_by_dev(newdev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+ super_format == -2 ? &claim_rdev : rdev);
+ if (IS_ERR(rdev->bdev)) {
+ pr_warn("md: could not open device unknown-block(%u,%u).\n",
+ MAJOR(newdev), MINOR(newdev));
+ err = PTR_ERR(rdev->bdev);
+ goto out_clear_rdev;
+ }
kobject_init(&rdev->kobj, &rdev_ktype);
@@ -3686,7 +3674,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
pr_warn("md: %pg has zero or unknown size, marking faulty!\n",
rdev->bdev);
err = -EINVAL;
- goto abort_free;
+ goto out_blkdev_put;
}
if (super_format >= 0) {
@@ -3696,21 +3684,22 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
pr_warn("md: %pg does not have a valid v%d.%d superblock, not importing!\n",
rdev->bdev,
super_format, super_minor);
- goto abort_free;
+ goto out_blkdev_put;
}
if (err < 0) {
pr_warn("md: could not read %pg's sb, not importing!\n",
rdev->bdev);
- goto abort_free;
+ goto out_blkdev_put;
}
}
return rdev;
-abort_free:
- if (rdev->bdev)
- unlock_rdev(rdev);
+out_blkdev_put:
+ blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+out_clear_rdev:
md_rdev_clear(rdev);
+out_free_rdev:
kfree(rdev);
return ERR_PTR(err);
}
@@ -3901,7 +3890,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock;
}
rv = -EROFS;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
goto out_unlock;
/* request to change the personality. Need to ensure:
@@ -4107,7 +4096,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) {
if (mddev->pers->check_reshape == NULL)
err = -EBUSY;
- else if (mddev->ro)
+ else if (!md_is_rdwr(mddev))
err = -EROFS;
else {
mddev->new_layout = n;
@@ -4216,7 +4205,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) {
if (mddev->pers->check_reshape == NULL)
err = -EBUSY;
- else if (mddev->ro)
+ else if (!md_is_rdwr(mddev))
err = -EROFS;
else {
mddev->new_chunk_sectors = n >> 9;
@@ -4339,13 +4328,13 @@ array_state_show(struct mddev *mddev, char *page)
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
switch(mddev->ro) {
- case 1:
+ case MD_RDONLY:
st = readonly;
break;
- case 2:
+ case MD_AUTO_READ:
st = read_auto;
break;
- case 0:
+ case MD_RDWR:
spin_lock(&mddev->lock);
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
st = write_pending;
@@ -4381,7 +4370,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
int err = 0;
enum array_state st = match_word(buf, array_states);
- if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
+ if (mddev->pers && (st == active || st == clean) &&
+ mddev->ro != MD_RDONLY) {
/* don't take reconfig_mutex when toggling between
* clean and active
*/
@@ -4425,23 +4415,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers)
err = md_set_readonly(mddev, NULL);
else {
- mddev->ro = 1;
+ mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev);
}
break;
case read_auto:
if (mddev->pers) {
- if (mddev->ro == 0)
+ if (md_is_rdwr(mddev))
err = md_set_readonly(mddev, NULL);
- else if (mddev->ro == 1)
+ else if (mddev->ro == MD_RDONLY)
err = restart_array(mddev);
if (err == 0) {
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
set_disk_ro(mddev->gendisk, 0);
}
} else {
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
err = do_md_run(mddev);
}
break;
@@ -4466,7 +4456,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
wake_up(&mddev->sb_wait);
err = 0;
} else {
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev);
}
@@ -4765,7 +4755,7 @@ action_show(struct mddev *mddev, char *page)
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
type = "frozen";
else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
+ (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
type = "reshape";
else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
@@ -4851,11 +4841,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
}
- if (mddev->ro == 2) {
+ if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5083,8 +5073,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock;
err = -EBUSY;
- if (max < mddev->resync_max &&
- mddev->ro == 0 &&
+ if (max < mddev->resync_max && md_is_rdwr(mddev) &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
goto out_unlock;
@@ -5768,6 +5757,12 @@ static void md_safemode_timeout(struct timer_list *t)
}
static int start_dirty_degraded;
+static void active_io_release(struct percpu_ref *ref)
+{
+ struct mddev *mddev = container_of(ref, struct mddev, active_io);
+
+ wake_up(&mddev->sb_wait);
+}
int md_run(struct mddev *mddev)
{
@@ -5813,8 +5808,8 @@ int md_run(struct mddev *mddev)
continue;
sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev);
- if (mddev->ro != 1 && rdev_read_only(rdev)) {
- mddev->ro = 1;
+ if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
+ mddev->ro = MD_RDONLY;
if (mddev->gendisk)
set_disk_ro(mddev->gendisk, 1);
}
@@ -5848,10 +5843,15 @@ int md_run(struct mddev *mddev)
nowait = nowait && bdev_nowait(rdev->bdev);
}
+ err = percpu_ref_init(&mddev->active_io, active_io_release,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
+ if (err)
+ return err;
+
if (!bioset_initialized(&mddev->bio_set)) {
err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
- return err;
+ goto exit_active_io;
}
if (!bioset_initialized(&mddev->sync_set)) {
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
@@ -5917,8 +5917,8 @@ int md_run(struct mddev *mddev)
mddev->ok_start_degraded = start_dirty_degraded;
- if (start_readonly && mddev->ro == 0)
- mddev->ro = 2; /* read-only, but switch on first write */
+ if (start_readonly && md_is_rdwr(mddev))
+ mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */
err = pers->run(mddev);
if (err)
@@ -5996,8 +5996,8 @@ int md_run(struct mddev *mddev)
mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
- } else if (mddev->ro == 2) /* auto-readonly not meaningful */
- mddev->ro = 0;
+ } else if (mddev->ro == MD_AUTO_READ)
+ mddev->ro = MD_RDWR;
atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
@@ -6015,7 +6015,7 @@ int md_run(struct mddev *mddev)
if (rdev->raid_disk >= 0)
sysfs_link_rdev(mddev, rdev); /* failure here is OK */
- if (mddev->degraded && !mddev->ro)
+ if (mddev->degraded && md_is_rdwr(mddev))
/* This ensures that recovering status is reported immediately
* via sysfs - until a lack of spares is confirmed.
*/
@@ -6039,6 +6039,8 @@ abort:
bioset_exit(&mddev->sync_set);
exit_bio_set:
bioset_exit(&mddev->bio_set);
+exit_active_io:
+ percpu_ref_exit(&mddev->active_io);
return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -6105,7 +6107,7 @@ static int restart_array(struct mddev *mddev)
return -ENXIO;
if (!mddev->pers)
return -EINVAL;
- if (!mddev->ro)
+ if (md_is_rdwr(mddev))
return -EBUSY;
rcu_read_lock();
@@ -6124,7 +6126,7 @@ static int restart_array(struct mddev *mddev)
return -EROFS;
mddev->safemode = 0;
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_disk_ro(disk, 0);
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */
@@ -6151,7 +6153,7 @@ static void md_clean(struct mddev *mddev)
mddev->clevel[0] = 0;
mddev->flags = 0;
mddev->sb_flags = 0;
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
mddev->chunk_sectors = 0;
mddev->ctime = mddev->utime = 0;
@@ -6164,7 +6166,7 @@ static void md_clean(struct mddev *mddev)
mddev->new_level = LEVEL_NONE;
mddev->new_layout = 0;
mddev->new_chunk_sectors = 0;
- mddev->curr_resync = 0;
+ mddev->curr_resync = MD_RESYNC_NONE;
atomic64_set(&mddev->resync_mismatches, 0);
mddev->suspend_lo = mddev->suspend_hi = 0;
mddev->sync_speed_min = mddev->sync_speed_max = 0;
@@ -6203,7 +6205,7 @@ static void __md_stop_writes(struct mddev *mddev)
}
md_bitmap_flush(mddev);
- if (mddev->ro == 0 &&
+ if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
mddev->sb_flags)) {
/* mark array as shutdown cleanly */
@@ -6227,7 +6229,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
md_bitmap_wait_behind_writes(mddev);
- if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
+ if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
@@ -6263,6 +6265,8 @@ void md_stop(struct mddev *mddev)
*/
__md_stop_writes(mddev);
__md_stop(mddev);
+ percpu_ref_exit(&mddev->writes_pending);
+ percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
}
@@ -6312,9 +6316,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
__md_stop_writes(mddev);
err = -ENXIO;
- if (mddev->ro==1)
+ if (mddev->ro == MD_RDONLY)
goto out;
- mddev->ro = 1;
+ mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -6371,7 +6375,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
return -EBUSY;
}
if (mddev->pers) {
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
set_disk_ro(disk, 0);
__md_stop_writes(mddev);
@@ -6388,8 +6392,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
- if (mddev->ro)
- mddev->ro = 0;
+ if (!md_is_rdwr(mddev))
+ mddev->ro = MD_RDWR;
} else
mutex_unlock(&mddev->open_mutex);
/*
@@ -7204,7 +7208,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->sync_thread)
return -EBUSY;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return -EROFS;
rdev_for_each(rdev, mddev) {
@@ -7234,7 +7238,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
/* change the number of raid disks */
if (mddev->pers->check_reshape == NULL)
return -EINVAL;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return -EROFS;
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
@@ -7464,6 +7468,40 @@ static inline bool md_ioctl_valid(unsigned int cmd)
}
}
+static int __md_set_array_info(struct mddev *mddev, void __user *argp)
+{
+ mdu_array_info_t info;
+ int err;
+
+ if (!argp)
+ memset(&info, 0, sizeof(info));
+ else if (copy_from_user(&info, argp, sizeof(info)))
+ return -EFAULT;
+
+ if (mddev->pers) {
+ err = update_array_info(mddev, &info);
+ if (err)
+ pr_warn("md: couldn't update array info. %d\n", err);
+ return err;
+ }
+
+ if (!list_empty(&mddev->disks)) {
+ pr_warn("md: array %s already has disks!\n", mdname(mddev));
+ return -EBUSY;
+ }
+
+ if (mddev->raid_disks) {
+ pr_warn("md: array %s already initialised!\n", mdname(mddev));
+ return -EBUSY;
+ }
+
+ err = md_set_array_info(mddev, &info);
+ if (err)
+ pr_warn("md: couldn't set array info. %d\n", err);
+
+ return err;
+}
+
static int md_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -7569,36 +7607,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
if (cmd == SET_ARRAY_INFO) {
- mdu_array_info_t info;
- if (!arg)
- memset(&info, 0, sizeof(info));
- else if (copy_from_user(&info, argp, sizeof(info))) {
- err = -EFAULT;
- goto unlock;
- }
- if (mddev->pers) {
- err = update_array_info(mddev, &info);
- if (err) {
- pr_warn("md: couldn't update array info. %d\n", err);
- goto unlock;
- }
- goto unlock;
- }
- if (!list_empty(&mddev->disks)) {
- pr_warn("md: array %s already has disks!\n", mdname(mddev));
- err = -EBUSY;
- goto unlock;
- }
- if (mddev->raid_disks) {
- pr_warn("md: array %s already initialised!\n", mdname(mddev));
- err = -EBUSY;
- goto unlock;
- }
- err = md_set_array_info(mddev, &info);
- if (err) {
- pr_warn("md: couldn't set array info. %d\n", err);
- goto unlock;
- }
+ err = __md_set_array_info(mddev, argp);
goto unlock;
}
@@ -7658,26 +7667,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
* The remaining ioctls are changing the state of the
* superblock, so we do not allow them on read-only arrays.
*/
- if (mddev->ro && mddev->pers) {
- if (mddev->ro == 2) {
- mddev->ro = 0;
- sysfs_notify_dirent_safe(mddev->sysfs_state);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- /* mddev_unlock will wake thread */
- /* If a device failed while we were read-only, we
- * need to make sure the metadata is updated now.
- */
- if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
- mddev_unlock(mddev);
- wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
- mddev_lock_nointr(mddev);
- }
- } else {
+ if (!md_is_rdwr(mddev) && mddev->pers) {
+ if (mddev->ro != MD_AUTO_READ) {
err = -EROFS;
goto unlock;
}
+ mddev->ro = MD_RDWR;
+ sysfs_notify_dirent_safe(mddev->sysfs_state);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ /* mddev_unlock will wake thread */
+ /* If a device failed while we were read-only, we
+ * need to make sure the metadata is updated now.
+ */
+ if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
+ mddev_unlock(mddev);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+ mddev_lock_nointr(mddev);
+ }
}
switch (cmd) {
@@ -7763,11 +7771,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro)
* Transitioning to read-auto need only happen for arrays that call
* md_write_start and which are not ready for writes yet.
*/
- if (!ro && mddev->ro == 1 && mddev->pers) {
+ if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {
err = restart_array(mddev);
if (err)
goto out_unlock;
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
}
out_unlock:
@@ -7832,6 +7840,7 @@ static void md_free_disk(struct gendisk *disk)
struct mddev *mddev = disk->private_data;
percpu_ref_exit(&mddev->writes_pending);
+ percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
@@ -8241,9 +8250,9 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%s : %sactive", mdname(mddev),
mddev->pers ? "" : "in");
if (mddev->pers) {
- if (mddev->ro==1)
+ if (mddev->ro == MD_RDONLY)
seq_printf(seq, " (read-only)");
- if (mddev->ro==2)
+ if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
}
@@ -8502,10 +8511,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (bio_data_dir(bi) != WRITE)
return true;
- BUG_ON(mddev->ro == 1);
- if (mddev->ro == 2) {
+ BUG_ON(mddev->ro == MD_RDONLY);
+ if (mddev->ro == MD_AUTO_READ) {
/* need to switch to read/write */
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
@@ -8535,7 +8544,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
return true;
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
- mddev->suspended);
+ is_md_suspended(mddev));
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
percpu_ref_put(&mddev->writes_pending);
return false;
@@ -8556,7 +8565,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi)
{
if (bio_data_dir(bi) != WRITE)
return;
- WARN_ON_ONCE(mddev->in_sync || mddev->ro);
+ WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));
percpu_ref_get(&mddev->writes_pending);
}
EXPORT_SYMBOL(md_write_inc);
@@ -8619,12 +8628,15 @@ static void md_end_io_acct(struct bio *bio)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
+ struct mddev *mddev = md_io_acct->mddev;
orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
bio_put(bio);
bio_endio(orig_bio);
+
+ percpu_ref_put(&mddev->active_io);
}
/*
@@ -8640,10 +8652,13 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
if (!blk_queue_io_stat(bdev->bd_disk->queue))
return;
+ percpu_ref_get(&mddev->active_io);
+
clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
+ md_io_acct->mddev = mddev;
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
@@ -8661,7 +8676,7 @@ void md_allow_write(struct mddev *mddev)
{
if (!mddev->pers)
return;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return;
if (!mddev->pers->sync_request)
return;
@@ -8709,7 +8724,7 @@ void md_do_sync(struct md_thread *thread)
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return;
- if (mddev->ro) {/* never try to sync a read-only array */
+ if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return;
}
@@ -8887,7 +8902,7 @@ void md_do_sync(struct md_thread *thread)
atomic_set(&mddev->recovery_active, 0);
last_check = 0;
- if (j>2) {
+ if (j >= MD_RESYNC_ACTIVE) {
pr_debug("md: resuming %s of %s from checkpoint.\n",
desc, mdname(mddev));
mddev->curr_resync = j;
@@ -8959,7 +8974,7 @@ void md_do_sync(struct md_thread *thread)
if (j > max_sectors)
/* when skipping, extra large numbers can be returned. */
j = max_sectors;
- if (j > 2)
+ if (j >= MD_RESYNC_ACTIVE)
mddev->curr_resync = j;
mddev->curr_mark_cnt = io_sectors;
if (last_check == 0)
@@ -9034,7 +9049,7 @@ void md_do_sync(struct md_thread *thread)
mddev->pers->sync_request(mddev, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
- mddev->curr_resync >= MD_RESYNC_ACTIVE) {
+ mddev->curr_resync > MD_RESYNC_ACTIVE) {
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
if (mddev->curr_resync >= mddev->recovery_cp) {
@@ -9178,9 +9193,9 @@ static int remove_and_add_spares(struct mddev *mddev,
if (test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(Journal, &rdev->flags)) {
- if (mddev->ro &&
- ! (rdev->saved_raid_disk >= 0 &&
- !test_bit(Bitmap_sync, &rdev->flags)))
+ if (!md_is_rdwr(mddev) &&
+ !(rdev->saved_raid_disk >= 0 &&
+ !test_bit(Bitmap_sync, &rdev->flags)))
continue;
rdev->recovery_offset = 0;
@@ -9263,7 +9278,7 @@ void md_check_recovery(struct mddev *mddev)
wake_up(&mddev->sb_wait);
}
- if (mddev->suspended)
+ if (is_md_suspended(mddev))
return;
if (mddev->bitmap)
@@ -9278,7 +9293,8 @@ void md_check_recovery(struct mddev *mddev)
flush_signals(current);
}
- if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ if (!md_is_rdwr(mddev) &&
+ !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return;
if ( ! (
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
@@ -9297,7 +9313,7 @@ void md_check_recovery(struct mddev *mddev)
if (!mddev->external && mddev->safemode == 1)
mddev->safemode = 0;
- if (mddev->ro) {
+ if (!md_is_rdwr(mddev)) {
struct md_rdev *rdev;
if (!mddev->external && mddev->in_sync)
/* 'Blocked' flag not needed as failed devices