diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 390 | 
1 files changed, 203 insertions, 187 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index a467b492d4ad..927a43db5dfb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -93,6 +93,18 @@ static int remove_and_add_spares(struct mddev *mddev,  				 struct md_rdev *this);  static void mddev_detach(struct mddev *mddev); +enum md_ro_state { +	MD_RDWR, +	MD_RDONLY, +	MD_AUTO_READ, +	MD_MAX_STATE +}; + +static bool md_is_rdwr(struct mddev *mddev) +{ +	return (mddev->ro == MD_RDWR); +} +  /*   * Default number of read corrections we'll attempt on an rdev   * before ejecting it from the array. We divide the read error @@ -368,6 +380,10 @@ EXPORT_SYMBOL_GPL(md_new_event);  static LIST_HEAD(all_mddevs);  static DEFINE_SPINLOCK(all_mddevs_lock); +static bool is_md_suspended(struct mddev *mddev) +{ +	return percpu_ref_is_dying(&mddev->active_io); +}  /* Rather than calling directly into the personality make_request function,   * IO requests come here first so that we can check if the device is   * being suspended pending a reconfiguration. @@ -377,7 +393,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);   */  static bool is_suspended(struct mddev *mddev, struct bio *bio)  { -	if (mddev->suspended) +	if (is_md_suspended(mddev))  		return true;  	if (bio_data_dir(bio) != WRITE)  		return false; @@ -393,12 +409,10 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)  void md_handle_request(struct mddev *mddev, struct bio *bio)  {  check_suspended: -	rcu_read_lock();  	if (is_suspended(mddev, bio)) {  		DEFINE_WAIT(__wait);  		/* Bail out if REQ_NOWAIT is set for the bio */  		if (bio->bi_opf & REQ_NOWAIT) { -			rcu_read_unlock();  			bio_wouldblock_error(bio);  			return;  		} @@ -407,23 +421,19 @@ check_suspended:  					TASK_UNINTERRUPTIBLE);  			if (!is_suspended(mddev, bio))  				break; -			rcu_read_unlock();  			schedule(); -			rcu_read_lock();  		}  		finish_wait(&mddev->sb_wait, &__wait);  	} -	atomic_inc(&mddev->active_io); -	rcu_read_unlock(); +	if (!percpu_ref_tryget_live(&mddev->active_io)) +		goto check_suspended;  	if (!mddev->pers->make_request(mddev, bio)) { -		atomic_dec(&mddev->active_io); -		wake_up(&mddev->sb_wait); +		percpu_ref_put(&mddev->active_io);  		goto check_suspended;  	} -	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) -		wake_up(&mddev->sb_wait); +	percpu_ref_put(&mddev->active_io);  }  EXPORT_SYMBOL(md_handle_request); @@ -443,8 +453,10 @@ static void md_submit_bio(struct bio *bio)  	}  	bio = bio_split_to_limits(bio); +	if (!bio) +		return; -	if (mddev->ro == 1 && unlikely(rw == WRITE)) { +	if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {  		if (bio_sectors(bio) != 0)  			bio->bi_status = BLK_STS_IOERR;  		bio_endio(bio); @@ -469,11 +481,10 @@ void mddev_suspend(struct mddev *mddev)  	lockdep_assert_held(&mddev->reconfig_mutex);  	if (mddev->suspended++)  		return; -	synchronize_rcu();  	wake_up(&mddev->sb_wait);  	set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); -	smp_mb__after_atomic(); -	wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); +	percpu_ref_kill(&mddev->active_io); +	wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));  	mddev->pers->quiesce(mddev, 1);  	clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);  	wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); @@ -491,6 +502,7 @@ void mddev_resume(struct mddev *mddev)  	lockdep_assert_held(&mddev->reconfig_mutex);  	if (--mddev->suspended)  		return; +	percpu_ref_resurrect(&mddev->active_io);  	wake_up(&mddev->sb_wait);  	mddev->pers->quiesce(mddev, 0); @@ -509,13 +521,14 @@ static void md_end_flush(struct bio *bio)  	struct md_rdev *rdev = bio->bi_private;  	struct mddev *mddev = rdev->mddev; +	bio_put(bio); +  	rdev_dec_pending(rdev, mddev);  	if (atomic_dec_and_test(&mddev->flush_pending)) {  		/* The pre-request flush has finished */  		queue_work(md_wq, &mddev->flush_work);  	} -	bio_put(bio);  }  static void md_submit_flush_data(struct work_struct *ws); @@ -668,7 +681,6 @@ void mddev_init(struct mddev *mddev)  	timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);  	atomic_set(&mddev->active, 1);  	atomic_set(&mddev->openers, 0); -	atomic_set(&mddev->active_io, 0);  	spin_lock_init(&mddev->lock);  	atomic_set(&mddev->flush_pending, 0);  	init_waitqueue_head(&mddev->sb_wait); @@ -913,10 +925,12 @@ static void super_written(struct bio *bio)  	} else  		clear_bit(LastDev, &rdev->flags); +	bio_put(bio); + +	rdev_dec_pending(rdev, mddev); +  	if (atomic_dec_and_test(&mddev->pending_writes))  		wake_up(&mddev->sb_wait); -	rdev_dec_pending(rdev, mddev); -	bio_put(bio);  }  void md_super_write(struct mddev *mddev, struct md_rdev *rdev, @@ -2453,7 +2467,22 @@ static void rdev_delayed_delete(struct work_struct *ws)  	kobject_put(&rdev->kobj);  } -static void unbind_rdev_from_array(struct md_rdev *rdev) +void md_autodetect_dev(dev_t dev); + +static void export_rdev(struct md_rdev *rdev) +{ +	pr_debug("md: export_rdev(%pg)\n", rdev->bdev); +	md_rdev_clear(rdev); +#ifndef MODULE +	if (test_bit(AutoDetected, &rdev->flags)) +		md_autodetect_dev(rdev->bdev->bd_dev); +#endif +	blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +	rdev->bdev = NULL; +	kobject_put(&rdev->kobj); +} + +static void md_kick_rdev_from_array(struct md_rdev *rdev)  {  	bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);  	list_del_rcu(&rdev->same_set); @@ -2476,56 +2505,8 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)  	INIT_WORK(&rdev->del_work, rdev_delayed_delete);  	kobject_get(&rdev->kobj);  	queue_work(md_rdev_misc_wq, &rdev->del_work); -} - -/* - * prevent the device from being mounted, repartitioned or - * otherwise reused by a RAID array (or any other kernel - * subsystem), by bd_claiming the device. - */ -static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared) -{ -	int err = 0; -	struct block_device *bdev; - -	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, -				 shared ? (struct md_rdev *)lock_rdev : rdev); -	if (IS_ERR(bdev)) { -		pr_warn("md: could not open device unknown-block(%u,%u).\n", -			MAJOR(dev), MINOR(dev)); -		return PTR_ERR(bdev); -	} -	rdev->bdev = bdev; -	return err; -} - -static void unlock_rdev(struct md_rdev *rdev) -{ -	struct block_device *bdev = rdev->bdev; -	rdev->bdev = NULL; -	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - -void md_autodetect_dev(dev_t dev); - -static void export_rdev(struct md_rdev *rdev) -{ -	pr_debug("md: export_rdev(%pg)\n", rdev->bdev); -	md_rdev_clear(rdev); -#ifndef MODULE -	if (test_bit(AutoDetected, &rdev->flags)) -		md_autodetect_dev(rdev->bdev->bd_dev); -#endif -	unlock_rdev(rdev); -	kobject_put(&rdev->kobj); -} - -void md_kick_rdev_from_array(struct md_rdev *rdev) -{ -	unbind_rdev_from_array(rdev);  	export_rdev(rdev);  } -EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);  static void export_array(struct mddev *mddev)  { @@ -2639,7 +2620,7 @@ void md_update_sb(struct mddev *mddev, int force_change)  	int any_badblocks_changed = 0;  	int ret = -1; -	if (mddev->ro) { +	if (!md_is_rdwr(mddev)) {  		if (force_change)  			set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);  		return; @@ -3660,9 +3641,10 @@ EXPORT_SYMBOL_GPL(md_rdev_init);   */  static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)  { -	int err; +	static struct md_rdev claim_rdev; /* just for claiming the bdev */  	struct md_rdev *rdev;  	sector_t size; +	int err;  	rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);  	if (!rdev) @@ -3670,14 +3652,20 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe  	err = md_rdev_init(rdev);  	if (err) -		goto abort_free; +		goto out_free_rdev;  	err = alloc_disk_sb(rdev);  	if (err) -		goto abort_free; +		goto out_clear_rdev; -	err = lock_rdev(rdev, newdev, super_format == -2); -	if (err) -		goto abort_free; +	rdev->bdev = blkdev_get_by_dev(newdev, +			FMODE_READ | FMODE_WRITE | FMODE_EXCL, +			super_format == -2 ? &claim_rdev : rdev); +	if (IS_ERR(rdev->bdev)) { +		pr_warn("md: could not open device unknown-block(%u,%u).\n", +			MAJOR(newdev), MINOR(newdev)); +		err = PTR_ERR(rdev->bdev); +		goto out_clear_rdev; +	}  	kobject_init(&rdev->kobj, &rdev_ktype); @@ -3686,7 +3674,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe  		pr_warn("md: %pg has zero or unknown size, marking faulty!\n",  			rdev->bdev);  		err = -EINVAL; -		goto abort_free; +		goto out_blkdev_put;  	}  	if (super_format >= 0) { @@ -3696,21 +3684,22 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe  			pr_warn("md: %pg does not have a valid v%d.%d superblock, not importing!\n",  				rdev->bdev,  				super_format, super_minor); -			goto abort_free; +			goto out_blkdev_put;  		}  		if (err < 0) {  			pr_warn("md: could not read %pg's sb, not importing!\n",  				rdev->bdev); -			goto abort_free; +			goto out_blkdev_put;  		}  	}  	return rdev; -abort_free: -	if (rdev->bdev) -		unlock_rdev(rdev); +out_blkdev_put: +	blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +out_clear_rdev:  	md_rdev_clear(rdev); +out_free_rdev:  	kfree(rdev);  	return ERR_PTR(err);  } @@ -3901,7 +3890,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)  		goto out_unlock;  	}  	rv = -EROFS; -	if (mddev->ro) +	if (!md_is_rdwr(mddev))  		goto out_unlock;  	/* request to change the personality.  Need to ensure: @@ -4107,7 +4096,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)  	if (mddev->pers) {  		if (mddev->pers->check_reshape == NULL)  			err = -EBUSY; -		else if (mddev->ro) +		else if (!md_is_rdwr(mddev))  			err = -EROFS;  		else {  			mddev->new_layout = n; @@ -4216,7 +4205,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)  	if (mddev->pers) {  		if (mddev->pers->check_reshape == NULL)  			err = -EBUSY; -		else if (mddev->ro) +		else if (!md_is_rdwr(mddev))  			err = -EROFS;  		else {  			mddev->new_chunk_sectors = n >> 9; @@ -4339,13 +4328,13 @@ array_state_show(struct mddev *mddev, char *page)  	if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {  		switch(mddev->ro) { -		case 1: +		case MD_RDONLY:  			st = readonly;  			break; -		case 2: +		case MD_AUTO_READ:  			st = read_auto;  			break; -		case 0: +		case MD_RDWR:  			spin_lock(&mddev->lock);  			if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))  				st = write_pending; @@ -4381,7 +4370,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  	int err = 0;  	enum array_state st = match_word(buf, array_states); -	if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { +	if (mddev->pers && (st == active || st == clean) && +	    mddev->ro != MD_RDONLY) {  		/* don't take reconfig_mutex when toggling between  		 * clean and active  		 */ @@ -4425,23 +4415,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  		if (mddev->pers)  			err = md_set_readonly(mddev, NULL);  		else { -			mddev->ro = 1; +			mddev->ro = MD_RDONLY;  			set_disk_ro(mddev->gendisk, 1);  			err = do_md_run(mddev);  		}  		break;  	case read_auto:  		if (mddev->pers) { -			if (mddev->ro == 0) +			if (md_is_rdwr(mddev))  				err = md_set_readonly(mddev, NULL); -			else if (mddev->ro == 1) +			else if (mddev->ro == MD_RDONLY)  				err = restart_array(mddev);  			if (err == 0) { -				mddev->ro = 2; +				mddev->ro = MD_AUTO_READ;  				set_disk_ro(mddev->gendisk, 0);  			}  		} else { -			mddev->ro = 2; +			mddev->ro = MD_AUTO_READ;  			err = do_md_run(mddev);  		}  		break; @@ -4466,7 +4456,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  			wake_up(&mddev->sb_wait);  			err = 0;  		} else { -			mddev->ro = 0; +			mddev->ro = MD_RDWR;  			set_disk_ro(mddev->gendisk, 0);  			err = do_md_run(mddev);  		} @@ -4765,7 +4755,7 @@ action_show(struct mddev *mddev, char *page)  	if (test_bit(MD_RECOVERY_FROZEN, &recovery))  		type = "frozen";  	else if (test_bit(MD_RECOVERY_RUNNING, &recovery) || -	    (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) { +	    (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {  		if (test_bit(MD_RECOVERY_RESHAPE, &recovery))  			type = "reshape";  		else if (test_bit(MD_RECOVERY_SYNC, &recovery)) { @@ -4851,11 +4841,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)  		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);  		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);  	} -	if (mddev->ro == 2) { +	if (mddev->ro == MD_AUTO_READ) {  		/* A write to sync_action is enough to justify  		 * canceling read-auto mode  		 */ -		mddev->ro = 0; +		mddev->ro = MD_RDWR;  		md_wakeup_thread(mddev->sync_thread);  	}  	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5083,8 +5073,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)  			goto out_unlock;  		err = -EBUSY; -		if (max < mddev->resync_max && -		    mddev->ro == 0 && +		if (max < mddev->resync_max && md_is_rdwr(mddev) &&  		    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))  			goto out_unlock; @@ -5768,6 +5757,12 @@ static void md_safemode_timeout(struct timer_list *t)  }  static int start_dirty_degraded; +static void active_io_release(struct percpu_ref *ref) +{ +	struct mddev *mddev = container_of(ref, struct mddev, active_io); + +	wake_up(&mddev->sb_wait); +}  int md_run(struct mddev *mddev)  { @@ -5813,8 +5808,8 @@ int md_run(struct mddev *mddev)  			continue;  		sync_blockdev(rdev->bdev);  		invalidate_bdev(rdev->bdev); -		if (mddev->ro != 1 && rdev_read_only(rdev)) { -			mddev->ro = 1; +		if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) { +			mddev->ro = MD_RDONLY;  			if (mddev->gendisk)  				set_disk_ro(mddev->gendisk, 1);  		} @@ -5848,10 +5843,15 @@ int md_run(struct mddev *mddev)  		nowait = nowait && bdev_nowait(rdev->bdev);  	} +	err = percpu_ref_init(&mddev->active_io, active_io_release, +				PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); +	if (err) +		return err; +  	if (!bioset_initialized(&mddev->bio_set)) {  		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);  		if (err) -			return err; +			goto exit_active_io;  	}  	if (!bioset_initialized(&mddev->sync_set)) {  		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); @@ -5917,8 +5917,8 @@ int md_run(struct mddev *mddev)  	mddev->ok_start_degraded = start_dirty_degraded; -	if (start_readonly && mddev->ro == 0) -		mddev->ro = 2; /* read-only, but switch on first write */ +	if (start_readonly && md_is_rdwr(mddev)) +		mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */  	err = pers->run(mddev);  	if (err) @@ -5996,8 +5996,8 @@ int md_run(struct mddev *mddev)  		mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");  		mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");  		mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded"); -	} else if (mddev->ro == 2) /* auto-readonly not meaningful */ -		mddev->ro = 0; +	} else if (mddev->ro == MD_AUTO_READ) +		mddev->ro = MD_RDWR;  	atomic_set(&mddev->max_corr_read_errors,  		   MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); @@ -6015,7 +6015,7 @@ int md_run(struct mddev *mddev)  		if (rdev->raid_disk >= 0)  			sysfs_link_rdev(mddev, rdev); /* failure here is OK */ -	if (mddev->degraded && !mddev->ro) +	if (mddev->degraded && md_is_rdwr(mddev))  		/* This ensures that recovering status is reported immediately  		 * via sysfs - until a lack of spares is confirmed.  		 */ @@ -6039,6 +6039,8 @@ abort:  	bioset_exit(&mddev->sync_set);  exit_bio_set:  	bioset_exit(&mddev->bio_set); +exit_active_io: +	percpu_ref_exit(&mddev->active_io);  	return err;  }  EXPORT_SYMBOL_GPL(md_run); @@ -6105,7 +6107,7 @@ static int restart_array(struct mddev *mddev)  		return -ENXIO;  	if (!mddev->pers)  		return -EINVAL; -	if (!mddev->ro) +	if (md_is_rdwr(mddev))  		return -EBUSY;  	rcu_read_lock(); @@ -6124,7 +6126,7 @@ static int restart_array(struct mddev *mddev)  		return -EROFS;  	mddev->safemode = 0; -	mddev->ro = 0; +	mddev->ro = MD_RDWR;  	set_disk_ro(disk, 0);  	pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));  	/* Kick recovery or resync if necessary */ @@ -6151,7 +6153,7 @@ static void md_clean(struct mddev *mddev)  	mddev->clevel[0] = 0;  	mddev->flags = 0;  	mddev->sb_flags = 0; -	mddev->ro = 0; +	mddev->ro = MD_RDWR;  	mddev->metadata_type[0] = 0;  	mddev->chunk_sectors = 0;  	mddev->ctime = mddev->utime = 0; @@ -6164,7 +6166,7 @@ static void md_clean(struct mddev *mddev)  	mddev->new_level = LEVEL_NONE;  	mddev->new_layout = 0;  	mddev->new_chunk_sectors = 0; -	mddev->curr_resync = 0; +	mddev->curr_resync = MD_RESYNC_NONE;  	atomic64_set(&mddev->resync_mismatches, 0);  	mddev->suspend_lo = mddev->suspend_hi = 0;  	mddev->sync_speed_min = mddev->sync_speed_max = 0; @@ -6203,7 +6205,7 @@ static void __md_stop_writes(struct mddev *mddev)  	}  	md_bitmap_flush(mddev); -	if (mddev->ro == 0 && +	if (md_is_rdwr(mddev) &&  	    ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||  	     mddev->sb_flags)) {  		/* mark array as shutdown cleanly */ @@ -6227,7 +6229,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);  static void mddev_detach(struct mddev *mddev)  {  	md_bitmap_wait_behind_writes(mddev); -	if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) { +	if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {  		mddev->pers->quiesce(mddev, 1);  		mddev->pers->quiesce(mddev, 0);  	} @@ -6263,6 +6265,8 @@ void md_stop(struct mddev *mddev)  	 */  	__md_stop_writes(mddev);  	__md_stop(mddev); +	percpu_ref_exit(&mddev->writes_pending); +	percpu_ref_exit(&mddev->active_io);  	bioset_exit(&mddev->bio_set);  	bioset_exit(&mddev->sync_set);  } @@ -6312,9 +6316,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)  		__md_stop_writes(mddev);  		err  = -ENXIO; -		if (mddev->ro==1) +		if (mddev->ro == MD_RDONLY)  			goto out; -		mddev->ro = 1; +		mddev->ro = MD_RDONLY;  		set_disk_ro(mddev->gendisk, 1);  		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);  		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -6371,7 +6375,7 @@ static int do_md_stop(struct mddev *mddev, int mode,  		return -EBUSY;  	}  	if (mddev->pers) { -		if (mddev->ro) +		if (!md_is_rdwr(mddev))  			set_disk_ro(disk, 0);  		__md_stop_writes(mddev); @@ -6388,8 +6392,8 @@ static int do_md_stop(struct mddev *mddev, int mode,  		mutex_unlock(&mddev->open_mutex);  		mddev->changed = 1; -		if (mddev->ro) -			mddev->ro = 0; +		if (!md_is_rdwr(mddev)) +			mddev->ro = MD_RDWR;  	} else  		mutex_unlock(&mddev->open_mutex);  	/* @@ -7204,7 +7208,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)  	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||  	    mddev->sync_thread)  		return -EBUSY; -	if (mddev->ro) +	if (!md_is_rdwr(mddev))  		return -EROFS;  	rdev_for_each(rdev, mddev) { @@ -7234,7 +7238,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)  	/* change the number of raid disks */  	if (mddev->pers->check_reshape == NULL)  		return -EINVAL; -	if (mddev->ro) +	if (!md_is_rdwr(mddev))  		return -EROFS;  	if (raid_disks <= 0 ||  	    (mddev->max_disks && raid_disks >= mddev->max_disks)) @@ -7464,6 +7468,40 @@ static inline bool md_ioctl_valid(unsigned int cmd)  	}  } +static int __md_set_array_info(struct mddev *mddev, void __user *argp) +{ +	mdu_array_info_t info; +	int err; + +	if (!argp) +		memset(&info, 0, sizeof(info)); +	else if (copy_from_user(&info, argp, sizeof(info))) +		return -EFAULT; + +	if (mddev->pers) { +		err = update_array_info(mddev, &info); +		if (err) +			pr_warn("md: couldn't update array info. %d\n", err); +		return err; +	} + +	if (!list_empty(&mddev->disks)) { +		pr_warn("md: array %s already has disks!\n", mdname(mddev)); +		return -EBUSY; +	} + +	if (mddev->raid_disks) { +		pr_warn("md: array %s already initialised!\n", mdname(mddev)); +		return -EBUSY; +	} + +	err = md_set_array_info(mddev, &info); +	if (err) +		pr_warn("md: couldn't set array info. %d\n", err); + +	return err; +} +  static int md_ioctl(struct block_device *bdev, fmode_t mode,  			unsigned int cmd, unsigned long arg)  { @@ -7569,36 +7607,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,  	}  	if (cmd == SET_ARRAY_INFO) { -		mdu_array_info_t info; -		if (!arg) -			memset(&info, 0, sizeof(info)); -		else if (copy_from_user(&info, argp, sizeof(info))) { -			err = -EFAULT; -			goto unlock; -		} -		if (mddev->pers) { -			err = update_array_info(mddev, &info); -			if (err) { -				pr_warn("md: couldn't update array info. %d\n", err); -				goto unlock; -			} -			goto unlock; -		} -		if (!list_empty(&mddev->disks)) { -			pr_warn("md: array %s already has disks!\n", mdname(mddev)); -			err = -EBUSY; -			goto unlock; -		} -		if (mddev->raid_disks) { -			pr_warn("md: array %s already initialised!\n", mdname(mddev)); -			err = -EBUSY; -			goto unlock; -		} -		err = md_set_array_info(mddev, &info); -		if (err) { -			pr_warn("md: couldn't set array info. %d\n", err); -			goto unlock; -		} +		err = __md_set_array_info(mddev, argp);  		goto unlock;  	} @@ -7658,26 +7667,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,  	 * The remaining ioctls are changing the state of the  	 * superblock, so we do not allow them on read-only arrays.  	 */ -	if (mddev->ro && mddev->pers) { -		if (mddev->ro == 2) { -			mddev->ro = 0; -			sysfs_notify_dirent_safe(mddev->sysfs_state); -			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); -			/* mddev_unlock will wake thread */ -			/* If a device failed while we were read-only, we -			 * need to make sure the metadata is updated now. -			 */ -			if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) { -				mddev_unlock(mddev); -				wait_event(mddev->sb_wait, -					   !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) && -					   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); -				mddev_lock_nointr(mddev); -			} -		} else { +	if (!md_is_rdwr(mddev) && mddev->pers) { +		if (mddev->ro != MD_AUTO_READ) {  			err = -EROFS;  			goto unlock;  		} +		mddev->ro = MD_RDWR; +		sysfs_notify_dirent_safe(mddev->sysfs_state); +		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); +		/* mddev_unlock will wake thread */ +		/* If a device failed while we were read-only, we +		 * need to make sure the metadata is updated now. +		 */ +		if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) { +			mddev_unlock(mddev); +			wait_event(mddev->sb_wait, +				   !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) && +				   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); +			mddev_lock_nointr(mddev); +		}  	}  	switch (cmd) { @@ -7763,11 +7771,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro)  	 * Transitioning to read-auto need only happen for arrays that call  	 * md_write_start and which are not ready for writes yet.  	 */ -	if (!ro && mddev->ro == 1 && mddev->pers) { +	if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {  		err = restart_array(mddev);  		if (err)  			goto out_unlock; -		mddev->ro = 2; +		mddev->ro = MD_AUTO_READ;  	}  out_unlock: @@ -7832,6 +7840,7 @@ static void md_free_disk(struct gendisk *disk)  	struct mddev *mddev = disk->private_data;  	percpu_ref_exit(&mddev->writes_pending); +	percpu_ref_exit(&mddev->active_io);  	bioset_exit(&mddev->bio_set);  	bioset_exit(&mddev->sync_set); @@ -8241,9 +8250,9 @@ static int md_seq_show(struct seq_file *seq, void *v)  		seq_printf(seq, "%s : %sactive", mdname(mddev),  						mddev->pers ? "" : "in");  		if (mddev->pers) { -			if (mddev->ro==1) +			if (mddev->ro == MD_RDONLY)  				seq_printf(seq, " (read-only)"); -			if (mddev->ro==2) +			if (mddev->ro == MD_AUTO_READ)  				seq_printf(seq, " (auto-read-only)");  			seq_printf(seq, " %s", mddev->pers->name);  		} @@ -8502,10 +8511,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)  	if (bio_data_dir(bi) != WRITE)  		return true; -	BUG_ON(mddev->ro == 1); -	if (mddev->ro == 2) { +	BUG_ON(mddev->ro == MD_RDONLY); +	if (mddev->ro == MD_AUTO_READ) {  		/* need to switch to read/write */ -		mddev->ro = 0; +		mddev->ro = MD_RDWR;  		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);  		md_wakeup_thread(mddev->thread);  		md_wakeup_thread(mddev->sync_thread); @@ -8535,7 +8544,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)  		return true;  	wait_event(mddev->sb_wait,  		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || -		   mddev->suspended); +		   is_md_suspended(mddev));  	if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {  		percpu_ref_put(&mddev->writes_pending);  		return false; @@ -8556,7 +8565,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi)  {  	if (bio_data_dir(bi) != WRITE)  		return; -	WARN_ON_ONCE(mddev->in_sync || mddev->ro); +	WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));  	percpu_ref_get(&mddev->writes_pending);  }  EXPORT_SYMBOL(md_write_inc); @@ -8619,12 +8628,15 @@ static void md_end_io_acct(struct bio *bio)  {  	struct md_io_acct *md_io_acct = bio->bi_private;  	struct bio *orig_bio = md_io_acct->orig_bio; +	struct mddev *mddev = md_io_acct->mddev;  	orig_bio->bi_status = bio->bi_status;  	bio_end_io_acct(orig_bio, md_io_acct->start_time);  	bio_put(bio);  	bio_endio(orig_bio); + +	percpu_ref_put(&mddev->active_io);  }  /* @@ -8640,10 +8652,13 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)  	if (!blk_queue_io_stat(bdev->bd_disk->queue))  		return; +	percpu_ref_get(&mddev->active_io); +  	clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);  	md_io_acct = container_of(clone, struct md_io_acct, bio_clone);  	md_io_acct->orig_bio = *bio;  	md_io_acct->start_time = bio_start_io_acct(*bio); +	md_io_acct->mddev = mddev;  	clone->bi_end_io = md_end_io_acct;  	clone->bi_private = md_io_acct; @@ -8661,7 +8676,7 @@ void md_allow_write(struct mddev *mddev)  {  	if (!mddev->pers)  		return; -	if (mddev->ro) +	if (!md_is_rdwr(mddev))  		return;  	if (!mddev->pers->sync_request)  		return; @@ -8709,7 +8724,7 @@ void md_do_sync(struct md_thread *thread)  	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||  	    test_bit(MD_RECOVERY_WAIT, &mddev->recovery))  		return; -	if (mddev->ro) {/* never try to sync a read-only array */ +	if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */  		set_bit(MD_RECOVERY_INTR, &mddev->recovery);  		return;  	} @@ -8887,7 +8902,7 @@ void md_do_sync(struct md_thread *thread)  	atomic_set(&mddev->recovery_active, 0);  	last_check = 0; -	if (j>2) { +	if (j >= MD_RESYNC_ACTIVE) {  		pr_debug("md: resuming %s of %s from checkpoint.\n",  			 desc, mdname(mddev));  		mddev->curr_resync = j; @@ -8959,7 +8974,7 @@ void md_do_sync(struct md_thread *thread)  		if (j > max_sectors)  			/* when skipping, extra large numbers can be returned. */  			j = max_sectors; -		if (j > 2) +		if (j >= MD_RESYNC_ACTIVE)  			mddev->curr_resync = j;  		mddev->curr_mark_cnt = io_sectors;  		if (last_check == 0) @@ -9034,7 +9049,7 @@ void md_do_sync(struct md_thread *thread)  	mddev->pers->sync_request(mddev, max_sectors, &skipped);  	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && -	    mddev->curr_resync >= MD_RESYNC_ACTIVE) { +	    mddev->curr_resync > MD_RESYNC_ACTIVE) {  		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {  			if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {  				if (mddev->curr_resync >= mddev->recovery_cp) { @@ -9178,9 +9193,9 @@ static int remove_and_add_spares(struct mddev *mddev,  		if (test_bit(Faulty, &rdev->flags))  			continue;  		if (!test_bit(Journal, &rdev->flags)) { -			if (mddev->ro && -			    ! (rdev->saved_raid_disk >= 0 && -			       !test_bit(Bitmap_sync, &rdev->flags))) +			if (!md_is_rdwr(mddev) && +			    !(rdev->saved_raid_disk >= 0 && +			      !test_bit(Bitmap_sync, &rdev->flags)))  				continue;  			rdev->recovery_offset = 0; @@ -9263,7 +9278,7 @@ void md_check_recovery(struct mddev *mddev)  		wake_up(&mddev->sb_wait);  	} -	if (mddev->suspended) +	if (is_md_suspended(mddev))  		return;  	if (mddev->bitmap) @@ -9278,7 +9293,8 @@ void md_check_recovery(struct mddev *mddev)  		flush_signals(current);  	} -	if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) +	if (!md_is_rdwr(mddev) && +	    !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))  		return;  	if ( ! (  		(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || @@ -9297,7 +9313,7 @@ void md_check_recovery(struct mddev *mddev)  		if (!mddev->external && mddev->safemode == 1)  			mddev->safemode = 0; -		if (mddev->ro) { +		if (!md_is_rdwr(mddev)) {  			struct md_rdev *rdev;  			if (!mddev->external && mddev->in_sync)  				/* 'Blocked' flag not needed as failed devices  |