diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 92 | 
1 files changed, 65 insertions, 27 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 866825f10b4c..2c3ab6f5e6be 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -394,8 +394,9 @@ static void submit_flushes(struct work_struct *ws)  			bi->bi_end_io = md_end_flush;  			bi->bi_private = rdev;  			bi->bi_bdev = rdev->bdev; +			bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);  			atomic_inc(&mddev->flush_pending); -			submit_bio(WRITE_FLUSH, bi); +			submit_bio(bi);  			rcu_read_lock();  			rdev_dec_pending(rdev, mddev);  		} @@ -413,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws)  		/* an empty barrier - all done */  		bio_endio(bio);  	else { -		bio->bi_rw &= ~REQ_FLUSH; +		bio->bi_rw &= ~REQ_PREFLUSH;  		mddev->pers->make_request(mddev, bio);  	} @@ -742,9 +743,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,  	bio_add_page(bio, page, size, 0);  	bio->bi_private = rdev;  	bio->bi_end_io = super_written; +	bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);  	atomic_inc(&mddev->pending_writes); -	submit_bio(WRITE_FLUSH_FUA, bio); +	submit_bio(bio);  }  void md_super_wait(struct mddev *mddev) @@ -754,13 +756,14 @@ void md_super_wait(struct mddev *mddev)  }  int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, -		 struct page *page, int rw, bool metadata_op) +		 struct page *page, int op, int op_flags, bool metadata_op)  {  	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);  	int ret;  	bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?  		rdev->meta_bdev : rdev->bdev; +	bio_set_op_attrs(bio, op, op_flags);  	if (metadata_op)  		bio->bi_iter.bi_sector = sector + rdev->sb_start;  	else if (rdev->mddev->reshape_position != MaxSector && @@ -770,7 +773,8 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,  	else  		bio->bi_iter.bi_sector = sector + rdev->data_offset;  	bio_add_page(bio, page, size, 0); -	submit_bio_wait(rw, bio); + +	submit_bio_wait(bio);  	ret = !bio->bi_error;  	bio_put(bio); @@ -785,7 +789,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)  	if (rdev->sb_loaded)  		return 0; -	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true)) +	if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))  		goto fail;  	rdev->sb_loaded = 1;  	return 0; @@ -1471,7 +1475,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_  			return -EINVAL;  		bb_sector = (long long)offset;  		if (!sync_page_io(rdev, bb_sector, sectors << 9, -				  rdev->bb_page, READ, true)) +				  rdev->bb_page, REQ_OP_READ, 0, true))  			return -EIO;  		bbp = (u64 *)page_address(rdev->bb_page);  		rdev->badblocks.shift = sb->bblog_shift; @@ -2478,8 +2482,7 @@ static int add_bound_rdev(struct md_rdev *rdev)  		if (add_journal)  			mddev_resume(mddev);  		if (err) { -			unbind_rdev_from_array(rdev); -			export_rdev(rdev); +			md_kick_rdev_from_array(rdev);  			return err;  		}  	} @@ -2596,6 +2599,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)  		else  			err = -EBUSY;  	} else if (cmd_match(buf, "remove")) { +		if (rdev->mddev->pers) { +			clear_bit(Blocked, &rdev->flags); +			remove_and_add_spares(rdev->mddev, rdev); +		}  		if (rdev->raid_disk >= 0)  			err = -EBUSY;  		else { @@ -3172,8 +3179,7 @@ int md_rdev_init(struct md_rdev *rdev)  	rdev->data_offset = 0;  	rdev->new_data_offset = 0;  	rdev->sb_events = 0; -	rdev->last_read_error.tv_sec  = 0; -	rdev->last_read_error.tv_nsec = 0; +	rdev->last_read_error = 0;  	rdev->sb_loaded = 0;  	rdev->bb_page = NULL;  	atomic_set(&rdev->nr_pending, 0); @@ -3579,6 +3585,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)  			mddev->to_remove = &md_redundancy_group;  	} +	module_put(oldpers->owner); +  	rdev_for_each(rdev, mddev) {  		if (rdev->raid_disk < 0)  			continue; @@ -3936,6 +3944,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)  			} else  				err = -EBUSY;  		} +		if (!err) +			sysfs_notify_dirent_safe(mddev->sysfs_state);  		spin_unlock(&mddev->lock);  		return err ?: len;  	} @@ -4187,7 +4197,8 @@ size_store(struct mddev *mddev, const char *buf, size_t len)  		return err;  	if (mddev->pers) {  		err = update_size(mddev, sectors); -		md_update_sb(mddev, 1); +		if (err == 0) +			md_update_sb(mddev, 1);  	} else {  		if (mddev->dev_sectors == 0 ||  		    mddev->dev_sectors > sectors) @@ -7809,6 +7820,7 @@ void md_do_sync(struct md_thread *thread)  		if (ret)  			goto skip; +		set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);  		if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||  			test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||  			test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) @@ -8147,18 +8159,11 @@ void md_do_sync(struct md_thread *thread)  		}  	}   skip: -	if (mddev_is_clustered(mddev) && -	    ret == 0) { -		/* set CHANGE_PENDING here since maybe another -		 * update is needed, so other nodes are informed */ -		set_mask_bits(&mddev->flags, 0, -			      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS)); -		md_wakeup_thread(mddev->thread); -		wait_event(mddev->sb_wait, -			   !test_bit(MD_CHANGE_PENDING, &mddev->flags)); -		md_cluster_ops->resync_finish(mddev); -	} else -		set_bit(MD_CHANGE_DEVS, &mddev->flags); +	/* set CHANGE_PENDING here since maybe another update is needed, +	 * so other nodes are informed. It should be harmless for normal +	 * raid */ +	set_mask_bits(&mddev->flags, 0, +		      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));  	spin_lock(&mddev->lock);  	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -8184,15 +8189,34 @@ static int remove_and_add_spares(struct mddev *mddev,  	struct md_rdev *rdev;  	int spares = 0;  	int removed = 0; +	bool remove_some = false; -	rdev_for_each(rdev, mddev) +	rdev_for_each(rdev, mddev) { +		if ((this == NULL || rdev == this) && +		    rdev->raid_disk >= 0 && +		    !test_bit(Blocked, &rdev->flags) && +		    test_bit(Faulty, &rdev->flags) && +		    atomic_read(&rdev->nr_pending)==0) { +			/* Faulty non-Blocked devices with nr_pending == 0 +			 * never get nr_pending incremented, +			 * never get Faulty cleared, and never get Blocked set. +			 * So we can synchronize_rcu now rather than once per device +			 */ +			remove_some = true; +			set_bit(RemoveSynchronized, &rdev->flags); +		} +	} + +	if (remove_some) +		synchronize_rcu(); +	rdev_for_each(rdev, mddev) {  		if ((this == NULL || rdev == this) &&  		    rdev->raid_disk >= 0 &&  		    !test_bit(Blocked, &rdev->flags) && -		    (test_bit(Faulty, &rdev->flags) || +		    ((test_bit(RemoveSynchronized, &rdev->flags) ||  		     (!test_bit(In_sync, &rdev->flags) &&  		      !test_bit(Journal, &rdev->flags))) && -		    atomic_read(&rdev->nr_pending)==0) { +		    atomic_read(&rdev->nr_pending)==0)) {  			if (mddev->pers->hot_remove_disk(  				    mddev, rdev) == 0) {  				sysfs_unlink_rdev(mddev, rdev); @@ -8200,6 +8224,10 @@ static int remove_and_add_spares(struct mddev *mddev,  				removed++;  			}  		} +		if (remove_some && test_bit(RemoveSynchronized, &rdev->flags)) +			clear_bit(RemoveSynchronized, &rdev->flags); +	} +  	if (removed && mddev->kobj.sd)  		sysfs_notify(&mddev->kobj, NULL, "degraded"); @@ -8502,6 +8530,11 @@ void md_reap_sync_thread(struct mddev *mddev)  			rdev->saved_raid_disk = -1;  	md_update_sb(mddev, 1); +	/* MD_CHANGE_PENDING should be cleared by md_update_sb, so we can +	 * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by +	 * clustered raid */ +	if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags)) +		md_cluster_ops->resync_finish(mddev);  	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);  	clear_bit(MD_RECOVERY_DONE, &mddev->recovery);  	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); @@ -8799,6 +8832,7 @@ EXPORT_SYMBOL(md_reload_sb);   * at boot time.   */ +static DEFINE_MUTEX(detected_devices_mutex);  static LIST_HEAD(all_detected_devices);  struct detected_devices_node {  	struct list_head list; @@ -8812,7 +8846,9 @@ void md_autodetect_dev(dev_t dev)  	node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);  	if (node_detected_dev) {  		node_detected_dev->dev = dev; +		mutex_lock(&detected_devices_mutex);  		list_add_tail(&node_detected_dev->list, &all_detected_devices); +		mutex_unlock(&detected_devices_mutex);  	} else {  		printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"  			", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev)); @@ -8831,6 +8867,7 @@ static void autostart_arrays(int part)  	printk(KERN_INFO "md: Autodetecting RAID arrays.\n"); +	mutex_lock(&detected_devices_mutex);  	while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {  		i_scanned++;  		node_detected_dev = list_entry(all_detected_devices.next, @@ -8849,6 +8886,7 @@ static void autostart_arrays(int part)  		list_add(&rdev->same_set, &pending_raid_disks);  		i_passed++;  	} +	mutex_unlock(&detected_devices_mutex);  	printk(KERN_INFO "md: Scanned %d and added %d devices.\n",  						i_scanned, i_passed);  |