aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c162
1 files changed, 71 insertions, 91 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2417f4fb8724..8f06f0e47ba8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5914,8 +5914,7 @@ static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes)
}
static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
- u16 total_stripes,
- u16 real_stripes)
+ u16 total_stripes)
{
struct btrfs_io_context *bioc;
@@ -5924,8 +5923,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
sizeof(struct btrfs_io_context) +
/* Plus the variable array for the stripes */
sizeof(struct btrfs_io_stripe) * (total_stripes) +
- /* Plus the variable array for the tgt dev */
- sizeof(u16) * (real_stripes) +
/*
* Plus the raid_map, which includes both the tgt dev
* and the stripes.
@@ -5939,8 +5936,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
refcount_set(&bioc->refs, 1);
bioc->fs_info = fs_info;
- bioc->tgtdev_map = (u16 *)(bioc->stripes + total_stripes);
- bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes);
+ bioc->raid_map = (u64 *)(bioc->stripes + total_stripes);
+ bioc->replace_stripe_src = -1;
return bioc;
}
@@ -6204,93 +6201,74 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
int *num_stripes_ret, int *max_errors_ret)
{
u64 srcdev_devid = dev_replace->srcdev->devid;
- int tgtdev_indexes = 0;
+ /*
+ * At this stage, num_stripes is still the real number of stripes,
+ * excluding the duplicated stripes.
+ */
int num_stripes = *num_stripes_ret;
+ int nr_extra_stripes = 0;
int max_errors = *max_errors_ret;
int i;
- if (op == BTRFS_MAP_WRITE) {
- int index_where_to_add;
+ /*
+ * A block group which has "to_copy" set will eventually be copied by
+ * the dev-replace process. We can avoid cloning IO here.
+ */
+ if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
+ return;
- /*
- * A block group which have "to_copy" set will eventually
- * copied by dev-replace process. We can avoid cloning IO here.
- */
- if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
- return;
+ /*
+ * Duplicate the write operations while the dev-replace procedure is
+ * running. Since the copying of the old disk to the new disk takes
+ * place at run time while the filesystem is mounted writable, the
+ * regular write operations to the old disk have to be duplicated to go
+ * to the new disk as well.
+ *
+ * Note that device->missing is handled by the caller, and that the
+ * write to the old disk is already set up in the stripes array.
+ */
+ for (i = 0; i < num_stripes; i++) {
+ struct btrfs_io_stripe *old = &bioc->stripes[i];
+ struct btrfs_io_stripe *new = &bioc->stripes[num_stripes + nr_extra_stripes];
- /*
- * duplicate the write operations while the dev replace
- * procedure is running. Since the copying of the old disk to
- * the new disk takes place at run time while the filesystem is
- * mounted writable, the regular write operations to the old
- * disk have to be duplicated to go to the new disk as well.
- *
- * Note that device->missing is handled by the caller, and that
- * the write to the old disk is already set up in the stripes
- * array.
- */
- index_where_to_add = num_stripes;
- for (i = 0; i < num_stripes; i++) {
- if (bioc->stripes[i].dev->devid == srcdev_devid) {
- /* write to new disk, too */
- struct btrfs_io_stripe *new =
- bioc->stripes + index_where_to_add;
- struct btrfs_io_stripe *old =
- bioc->stripes + i;
-
- new->physical = old->physical;
- new->dev = dev_replace->tgtdev;
- bioc->tgtdev_map[i] = index_where_to_add;
- index_where_to_add++;
- max_errors++;
- tgtdev_indexes++;
- }
- }
- num_stripes = index_where_to_add;
- } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
- int index_srcdev = 0;
- int found = 0;
- u64 physical_of_found = 0;
+ if (old->dev->devid != srcdev_devid)
+ continue;
- /*
- * During the dev-replace procedure, the target drive can also
- * be used to read data in case it is needed to repair a corrupt
- * block elsewhere. This is possible if the requested area is
- * left of the left cursor. In this area, the target drive is a
- * full copy of the source drive.
- */
- for (i = 0; i < num_stripes; i++) {
- if (bioc->stripes[i].dev->devid == srcdev_devid) {
- /*
- * In case of DUP, in order to keep it simple,
- * only add the mirror with the lowest physical
- * address
- */
- if (found &&
- physical_of_found <= bioc->stripes[i].physical)
- continue;
- index_srcdev = i;
- found = 1;
- physical_of_found = bioc->stripes[i].physical;
- }
- }
- if (found) {
- struct btrfs_io_stripe *tgtdev_stripe =
- bioc->stripes + num_stripes;
+ new->physical = old->physical;
+ new->dev = dev_replace->tgtdev;
+ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+ bioc->replace_stripe_src = i;
+ nr_extra_stripes++;
+ }
- tgtdev_stripe->physical = physical_of_found;
- tgtdev_stripe->dev = dev_replace->tgtdev;
- bioc->tgtdev_map[index_srcdev] = num_stripes;
+ /* We can only have at most 2 extra nr_stripes (for DUP). */
+ ASSERT(nr_extra_stripes <= 2);
+ /*
+ * For GET_READ_MIRRORS, we can only return at most 1 extra stripe for
+ * replace.
+ * If we have 2 extra stripes, only choose the one with smaller physical.
+ */
+ if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
+ struct btrfs_io_stripe *first = &bioc->stripes[num_stripes];
+ struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1];
- tgtdev_indexes++;
- num_stripes++;
+ /* Only DUP can have two extra stripes. */
+ ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP);
+
+ /*
+ * Swap the last stripe stripes and reduce @nr_extra_stripes.
+ * The extra stripe would still be there, but won't be accessed.
+ */
+ if (first->physical > second->physical) {
+ swap(second->physical, first->physical);
+ swap(second->dev, first->dev);
+ nr_extra_stripes--;
}
}
- *num_stripes_ret = num_stripes;
- *max_errors_ret = max_errors;
- bioc->num_tgtdevs = tgtdev_indexes;
+ *num_stripes_ret = num_stripes + nr_extra_stripes;
+ *max_errors_ret = max_errors + nr_extra_stripes;
+ bioc->replace_nr_stripes = nr_extra_stripes;
}
static bool need_full_stripe(enum btrfs_map_op op)
@@ -6377,7 +6355,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int dev_replace_is_ongoing = 0;
int patch_the_first_stripe_for_dev_replace = 0;
u16 num_alloc_stripes;
- u16 tgtdev_indexes = 0;
u64 physical_to_patch_in_first_stripe = 0;
u64 raid56_full_stripe_start = (u64)-1;
u64 max_len;
@@ -6523,13 +6500,16 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
num_alloc_stripes = num_stripes;
- if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
- if (op == BTRFS_MAP_WRITE)
- num_alloc_stripes <<= 1;
- if (op == BTRFS_MAP_GET_READ_MIRRORS)
- num_alloc_stripes++;
- tgtdev_indexes = num_stripes;
- }
+ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+ op != BTRFS_MAP_READ)
+ /*
+ * For replace case, we need to add extra stripes for extra
+ * duplicated stripes.
+ *
+ * For both WRITE and GET_READ_MIRRORS, we may have at most
+ * 2 more stripes (DUP types, otherwise 1).
+ */
+ num_alloc_stripes += 2;
/*
* If this I/O maps to a single device, try to return the device and
@@ -6554,11 +6534,12 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
goto out;
}
- bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes);
+ bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes);
if (!bioc) {
ret = -ENOMEM;
goto out;
}
+ bioc->map_type = map->type;
for (i = 0; i < num_stripes; i++) {
set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset,
@@ -6599,7 +6580,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
*bioc_ret = bioc;
- bioc->map_type = map->type;
bioc->num_stripes = num_stripes;
bioc->max_errors = max_errors;
bioc->mirror_num = mirror_num;