diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 162 |
1 files changed, 71 insertions, 91 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2417f4fb8724..8f06f0e47ba8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5914,8 +5914,7 @@ static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes) } static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, - u16 total_stripes, - u16 real_stripes) + u16 total_stripes) { struct btrfs_io_context *bioc; @@ -5924,8 +5923,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ sizeof(struct btrfs_io_context) + /* Plus the variable array for the stripes */ sizeof(struct btrfs_io_stripe) * (total_stripes) + - /* Plus the variable array for the tgt dev */ - sizeof(u16) * (real_stripes) + /* * Plus the raid_map, which includes both the tgt dev * and the stripes. @@ -5939,8 +5936,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ refcount_set(&bioc->refs, 1); bioc->fs_info = fs_info; - bioc->tgtdev_map = (u16 *)(bioc->stripes + total_stripes); - bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); + bioc->raid_map = (u64 *)(bioc->stripes + total_stripes); + bioc->replace_stripe_src = -1; return bioc; } @@ -6204,93 +6201,74 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, int *num_stripes_ret, int *max_errors_ret) { u64 srcdev_devid = dev_replace->srcdev->devid; - int tgtdev_indexes = 0; + /* + * At this stage, num_stripes is still the real number of stripes, + * excluding the duplicated stripes. + */ int num_stripes = *num_stripes_ret; + int nr_extra_stripes = 0; int max_errors = *max_errors_ret; int i; - if (op == BTRFS_MAP_WRITE) { - int index_where_to_add; + /* + * A block group which has "to_copy" set will eventually be copied by + * the dev-replace process. We can avoid cloning IO here. + */ + if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical)) + return; - /* - * A block group which have "to_copy" set will eventually - * copied by dev-replace process. We can avoid cloning IO here. - */ - if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical)) - return; + /* + * Duplicate the write operations while the dev-replace procedure is + * running. Since the copying of the old disk to the new disk takes + * place at run time while the filesystem is mounted writable, the + * regular write operations to the old disk have to be duplicated to go + * to the new disk as well. + * + * Note that device->missing is handled by the caller, and that the + * write to the old disk is already set up in the stripes array. + */ + for (i = 0; i < num_stripes; i++) { + struct btrfs_io_stripe *old = &bioc->stripes[i]; + struct btrfs_io_stripe *new = &bioc->stripes[num_stripes + nr_extra_stripes]; - /* - * duplicate the write operations while the dev replace - * procedure is running. Since the copying of the old disk to - * the new disk takes place at run time while the filesystem is - * mounted writable, the regular write operations to the old - * disk have to be duplicated to go to the new disk as well. - * - * Note that device->missing is handled by the caller, and that - * the write to the old disk is already set up in the stripes - * array. - */ - index_where_to_add = num_stripes; - for (i = 0; i < num_stripes; i++) { - if (bioc->stripes[i].dev->devid == srcdev_devid) { - /* write to new disk, too */ - struct btrfs_io_stripe *new = - bioc->stripes + index_where_to_add; - struct btrfs_io_stripe *old = - bioc->stripes + i; - - new->physical = old->physical; - new->dev = dev_replace->tgtdev; - bioc->tgtdev_map[i] = index_where_to_add; - index_where_to_add++; - max_errors++; - tgtdev_indexes++; - } - } - num_stripes = index_where_to_add; - } else if (op == BTRFS_MAP_GET_READ_MIRRORS) { - int index_srcdev = 0; - int found = 0; - u64 physical_of_found = 0; + if (old->dev->devid != srcdev_devid) + continue; - /* - * During the dev-replace procedure, the target drive can also - * be used to read data in case it is needed to repair a corrupt - * block elsewhere. This is possible if the requested area is - * left of the left cursor. In this area, the target drive is a - * full copy of the source drive. - */ - for (i = 0; i < num_stripes; i++) { - if (bioc->stripes[i].dev->devid == srcdev_devid) { - /* - * In case of DUP, in order to keep it simple, - * only add the mirror with the lowest physical - * address - */ - if (found && - physical_of_found <= bioc->stripes[i].physical) - continue; - index_srcdev = i; - found = 1; - physical_of_found = bioc->stripes[i].physical; - } - } - if (found) { - struct btrfs_io_stripe *tgtdev_stripe = - bioc->stripes + num_stripes; + new->physical = old->physical; + new->dev = dev_replace->tgtdev; + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) + bioc->replace_stripe_src = i; + nr_extra_stripes++; + } - tgtdev_stripe->physical = physical_of_found; - tgtdev_stripe->dev = dev_replace->tgtdev; - bioc->tgtdev_map[index_srcdev] = num_stripes; + /* We can only have at most 2 extra nr_stripes (for DUP). */ + ASSERT(nr_extra_stripes <= 2); + /* + * For GET_READ_MIRRORS, we can only return at most 1 extra stripe for + * replace. + * If we have 2 extra stripes, only choose the one with smaller physical. + */ + if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) { + struct btrfs_io_stripe *first = &bioc->stripes[num_stripes]; + struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1]; - tgtdev_indexes++; - num_stripes++; + /* Only DUP can have two extra stripes. */ + ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP); + + /* + * Swap the last stripe stripes and reduce @nr_extra_stripes. + * The extra stripe would still be there, but won't be accessed. + */ + if (first->physical > second->physical) { + swap(second->physical, first->physical); + swap(second->dev, first->dev); + nr_extra_stripes--; } } - *num_stripes_ret = num_stripes; - *max_errors_ret = max_errors; - bioc->num_tgtdevs = tgtdev_indexes; + *num_stripes_ret = num_stripes + nr_extra_stripes; + *max_errors_ret = max_errors + nr_extra_stripes; + bioc->replace_nr_stripes = nr_extra_stripes; } static bool need_full_stripe(enum btrfs_map_op op) @@ -6377,7 +6355,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, int dev_replace_is_ongoing = 0; int patch_the_first_stripe_for_dev_replace = 0; u16 num_alloc_stripes; - u16 tgtdev_indexes = 0; u64 physical_to_patch_in_first_stripe = 0; u64 raid56_full_stripe_start = (u64)-1; u64 max_len; @@ -6523,13 +6500,16 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } num_alloc_stripes = num_stripes; - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) { - if (op == BTRFS_MAP_WRITE) - num_alloc_stripes <<= 1; - if (op == BTRFS_MAP_GET_READ_MIRRORS) - num_alloc_stripes++; - tgtdev_indexes = num_stripes; - } + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && + op != BTRFS_MAP_READ) + /* + * For replace case, we need to add extra stripes for extra + * duplicated stripes. + * + * For both WRITE and GET_READ_MIRRORS, we may have at most + * 2 more stripes (DUP types, otherwise 1). + */ + num_alloc_stripes += 2; /* * If this I/O maps to a single device, try to return the device and @@ -6554,11 +6534,12 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, goto out; } - bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes); + bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes); if (!bioc) { ret = -ENOMEM; goto out; } + bioc->map_type = map->type; for (i = 0; i < num_stripes; i++) { set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset, @@ -6599,7 +6580,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } *bioc_ret = bioc; - bioc->map_type = map->type; bioc->num_stripes = num_stripes; bioc->max_errors = max_errors; bioc->mirror_num = mirror_num; |