diff options
Diffstat (limited to 'drivers/md/dm-raid.c')
| -rw-r--r-- | drivers/md/dm-raid.c | 164 | 
1 files changed, 88 insertions, 76 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b0aa595e4375..c412eaa975fc 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -209,6 +209,7 @@ struct raid_dev {  #define RT_FLAG_RS_SUSPENDED		5  #define RT_FLAG_RS_IN_SYNC		6  #define RT_FLAG_RS_RESYNCING		7 +#define RT_FLAG_RS_GROW			8  /* Array elements of 64 bit needed for rebuild/failed disk bits */  #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -241,6 +242,9 @@ struct raid_set {  	struct raid_type *raid_type;  	struct dm_target_callbacks callbacks; +	sector_t array_sectors; +	sector_t dev_sectors; +  	/* Optional raid4/5/6 journal device */  	struct journal_dev {  		struct dm_dev *dev; @@ -616,7 +620,6 @@ static int raid10_format_to_md_layout(struct raid_set *rs,  	} else if (algorithm == ALGORITHM_RAID10_FAR) {  		f = copies; -		r = !RAID10_OFFSET;  		if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))  			r |= RAID10_USE_FAR_SETS; @@ -1615,13 +1618,12 @@ static int _check_data_dev_sectors(struct raid_set *rs)  }  /* Calculate the sectors per device and per array used for @rs */ -static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) +static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)  {  	int delta_disks;  	unsigned int data_stripes; +	sector_t array_sectors = sectors, dev_sectors = sectors;  	struct mddev *mddev = &rs->md; -	struct md_rdev *rdev; -	sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len;  	if (use_mddev) {  		delta_disks = mddev->delta_disks; @@ -1656,12 +1658,9 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)  		/* Striped layouts */  		array_sectors = (data_stripes + delta_disks) * dev_sectors; -	rdev_for_each(rdev, mddev) -		if (!test_bit(Journal, &rdev->flags)) -			rdev->sectors = dev_sectors; -  	mddev->array_sectors = array_sectors;  	mddev->dev_sectors = dev_sectors; +	rs_set_rdev_sectors(rs);  	return _check_data_dev_sectors(rs);  bad: @@ -1670,7 +1669,7 @@ bad:  }  /* Setup recovery on @rs */ -static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) +static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)  {  	/* raid0 does not recover */  	if (rs_is_raid0(rs)) @@ -1691,22 +1690,6 @@ static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)  				     ? MaxSector : dev_sectors;  } -/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */ -static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) -{ -	if (!dev_sectors) -		/* New raid set or 'sync' flag provided */ -		__rs_setup_recovery(rs, 0); -	else if (dev_sectors == MaxSector) -		/* Prevent recovery */ -		__rs_setup_recovery(rs, MaxSector); -	else if (__rdev_sectors(rs) < dev_sectors) -		/* Grown raid set */ -		__rs_setup_recovery(rs, __rdev_sectors(rs)); -	else -		__rs_setup_recovery(rs, MaxSector); -} -  static void do_table_event(struct work_struct *ws)  {  	struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); @@ -2474,7 +2457,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)  		return -EINVAL;  	} -	/* Enable bitmap creation for RAID levels != 0 */ +	/* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */  	mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);  	mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; @@ -2911,7 +2894,7 @@ static int rs_setup_reshape(struct raid_set *rs)  	/* Remove disk(s) */  	} else if (rs->delta_disks < 0) { -		r = rs_set_dev_and_array_sectors(rs, true); +		r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true);  		mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */  	/* Change layout and/or chunk size */ @@ -3008,7 +2991,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  	bool resize = false;  	struct raid_type *rt;  	unsigned int num_raid_params, num_raid_devs; -	sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors; +	sector_t sb_array_sectors, rdev_sectors, reshape_sectors;  	struct raid_set *rs = NULL;  	const char *arg;  	struct rs_layout rs_layout; @@ -3067,11 +3050,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  	 *  	 * Any existing superblock will overwrite the array and device sizes  	 */ -	r = rs_set_dev_and_array_sectors(rs, false); +	r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);  	if (r)  		goto bad; -	calculated_dev_sectors = rs->md.dev_sectors; +	/* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */ +	rs->array_sectors = rs->md.array_sectors; +	rs->dev_sectors = rs->md.dev_sectors;  	/*  	 * Backup any new raid set level, layout, ... @@ -3084,6 +3069,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  	if (r)  		goto bad; +	/* All in-core metadata now as of current superblocks after calling analyse_superblocks() */ +	sb_array_sectors = rs->md.array_sectors;  	rdev_sectors = __rdev_sectors(rs);  	if (!rdev_sectors) {  		ti->error = "Invalid rdev size"; @@ -3093,8 +3080,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  	reshape_sectors = _get_reshape_sectors(rs); -	if (calculated_dev_sectors != rdev_sectors) -		resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors); +	if (rs->dev_sectors != rdev_sectors) { +		resize = (rs->dev_sectors != rdev_sectors - reshape_sectors); +		if (rs->dev_sectors > rdev_sectors - reshape_sectors) +			set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); +	}  	INIT_WORK(&rs->md.event_work, do_table_event);  	ti->private = rs; @@ -3121,13 +3111,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);  		rs_set_new(rs);  	} else if (rs_is_recovering(rs)) { -		/* Rebuild particular devices */ -		if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { -			set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); -			rs_setup_recovery(rs, MaxSector); -		}  		/* A recovering raid set may be resized */ -		; /* skip setup rs */ +		goto size_check;  	} else if (rs_is_reshaping(rs)) {  		/* Have to reject size change request during reshape */  		if (resize) { @@ -3171,6 +3156,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  		rs_setup_recovery(rs, MaxSector);  		rs_set_new(rs);  	} else if (rs_reshape_requested(rs)) { +		/* Only request grow on raid set size extensions, not on reshapes. */ +		clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); +  		/*  		 * No need to check for 'ongoing' takeover here, because takeover  		 * is an instant operation as oposed to an ongoing reshape. @@ -3201,13 +3189,31 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)  		}  		rs_set_cur(rs);  	} else { +size_check:  		/* May not set recovery when a device rebuild is requested */  		if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { -			rs_setup_recovery(rs, MaxSector); +			clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);  			set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); -		} else -			rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ? -					      0 : (resize ? calculated_dev_sectors : MaxSector)); +			rs_setup_recovery(rs, MaxSector); +		} else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { +			/* +			 * Set raid set to current size, i.e. size as of +			 * superblocks to grow to larger size in preresume. +			 */ +			r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false); +			if (r) +				goto bad; + +			rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); +		} else { +			/* This is no size change or it is shrinking, update size and record in superblocks */ +			r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); +			if (r) +				goto bad; + +			if (sb_array_sectors > rs->array_sectors) +				set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); +		}  		rs_set_cur(rs);  	} @@ -3406,10 +3412,9 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)  /* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */  static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, -				sector_t resync_max_sectors) +				enum sync_state state, sector_t resync_max_sectors)  {  	sector_t r; -	enum sync_state state;  	struct mddev *mddev = &rs->md;  	clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); @@ -3420,8 +3425,6 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,  		set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);  	} else { -		state = decipher_sync_action(mddev, recovery); -  		if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))  			r = mddev->recovery_cp;  		else @@ -3439,18 +3442,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,  			/*  			 * In case we are recovering, the array is not in sync  			 * and health chars should show the recovering legs. +			 * +			 * Already retrieved recovery offset from curr_resync_completed above.  			 */  			; -		else if (state == st_resync) -			/* -			 * If "resync" is occurring, the raid set -			 * is or may be out of sync hence the health -			 * characters shall be 'a'. -			 */ -			set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); -		else if (state == st_reshape) + +		else if (state == st_resync || state == st_reshape)  			/* -			 * If "reshape" is occurring, the raid set +			 * If "resync/reshape" is occurring, the raid set  			 * is or may be out of sync hence the health  			 * characters shall be 'a'.  			 */ @@ -3464,22 +3463,22 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,  			 */  			set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); -		else { -			struct md_rdev *rdev; - +		else if (test_bit(MD_RECOVERY_NEEDED, &recovery))  			/*  			 * We are idle and recovery is needed, prevent 'A' chars race  			 * caused by components still set to in-sync by constructor.  			 */ -			if (test_bit(MD_RECOVERY_NEEDED, &recovery)) -				set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); +			set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); +		else {  			/* -			 * The raid set may be doing an initial sync, or it may -			 * be rebuilding individual components.	 If all the -			 * devices are In_sync, then it is the raid set that is -			 * being initialized. +			 * We are idle and the raid set may be doing an initial +			 * sync, or it may be rebuilding individual components. +			 * If all the devices are In_sync, then it is the raid set +			 * that is being initialized.  			 */ +			struct md_rdev *rdev; +  			set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);  			rdev_for_each(rdev, mddev)  				if (!test_bit(Journal, &rdev->flags) && @@ -3512,7 +3511,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,  	unsigned int rebuild_disks;  	unsigned int write_mostly_params = 0;  	sector_t progress, resync_max_sectors, resync_mismatches; -	const char *sync_action; +	enum sync_state state;  	struct raid_type *rt;  	switch (type) { @@ -3526,14 +3525,14 @@ static void raid_status(struct dm_target *ti, status_type_t type,  		/* Access most recent mddev properties for status output */  		smp_rmb(); -		recovery = rs->md.recovery;  		/* Get sensible max sectors even if raid set not yet started */  		resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?  				      mddev->resync_max_sectors : mddev->dev_sectors; -		progress = rs_get_progress(rs, recovery, resync_max_sectors); +		recovery = rs->md.recovery; +		state = decipher_sync_action(mddev, recovery); +		progress = rs_get_progress(rs, recovery, state, resync_max_sectors);  		resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?  				    atomic64_read(&mddev->resync_mismatches) : 0; -		sync_action = sync_str(decipher_sync_action(&rs->md, recovery));  		/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */  		for (i = 0; i < rs->raid_disks; i++) @@ -3561,7 +3560,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,  		 *   See Documentation/admin-guide/device-mapper/dm-raid.rst for  		 *   information on each of these states.  		 */ -		DMEMIT(" %s", sync_action); +		DMEMIT(" %s", sync_str(state));  		/*  		 * v1.5.0+: @@ -3955,11 +3954,22 @@ static int raid_preresume(struct dm_target *ti)  	if (r)  		return r; -	/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */ -	if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && -	    mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) { -		r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, -				     to_bytes(rs->requested_bitmap_chunk_sectors), 0); +	/* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */ +	if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { +		mddev->array_sectors = rs->array_sectors; +		mddev->dev_sectors = rs->dev_sectors; +		rs_set_rdev_sectors(rs); +		rs_set_capacity(rs); +	} + +	/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ +        if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && +	    (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || +	     (rs->requested_bitmap_chunk_sectors && +	       mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { +		int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; + +		r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);  		if (r)  			DMERR("Failed to resize bitmap");  	} @@ -3968,8 +3978,10 @@ static int raid_preresume(struct dm_target *ti)  	/* Be prepared for mddev_resume() in raid_resume() */  	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);  	if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { -		set_bit(MD_RECOVERY_SYNC, &mddev->recovery); +		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);  		mddev->resync_min = mddev->recovery_cp; +		if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) +			mddev->resync_max_sectors = mddev->dev_sectors;  	}  	/* Check for any reshape request unless new raid set */ @@ -4017,7 +4029,7 @@ static void raid_resume(struct dm_target *ti)  static struct target_type raid_target = {  	.name = "raid", -	.version = {1, 14, 0}, +	.version = {1, 15, 0},  	.module = THIS_MODULE,  	.ctr = raid_ctr,  	.dtr = raid_dtr,  |