diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 143 |
1 files changed, 93 insertions, 50 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 78637665166e..7930e1c78c45 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -31,6 +31,7 @@ #include "space-info.h" #include "block-group.h" #include "discard.h" +#include "zoned.h" const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { @@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device) rcu_string_free(device->name); extent_io_tree_release(&device->alloc_state); bio_put(device->flush_bio); + btrfs_destroy_dev_zone_info(device); kfree(device); } @@ -667,6 +669,10 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); device->mode = flags; + ret = btrfs_get_dev_zone_info(device); + if (ret != 0) + goto error_free_page; + fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && device->devid != BTRFS_DEV_REPLACE_DEVID) { @@ -822,7 +828,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, } else { mutex_lock(&fs_devices->device_list_mutex); device = btrfs_find_device(fs_devices, devid, - disk_super->dev_item.uuid, NULL, false); + disk_super->dev_item.uuid, NULL); /* * If this disk has been pulled into an fs devices created by @@ -1044,7 +1050,7 @@ error: } static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, - int step, struct btrfs_device **latest_dev) + struct btrfs_device **latest_dev) { struct btrfs_device *device, *next; @@ -1089,16 +1095,16 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, * After we have read the system tree and know devids belonging to this * filesystem, remove the device which does not belong there. */ -void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step) +void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *latest_dev = NULL; struct btrfs_fs_devices *seed_dev; mutex_lock(&uuid_mutex); - __btrfs_free_extra_devids(fs_devices, step, &latest_dev); + __btrfs_free_extra_devids(fs_devices, &latest_dev); list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list) - __btrfs_free_extra_devids(seed_dev, step, &latest_dev); + __btrfs_free_extra_devids(seed_dev, &latest_dev); fs_devices->latest_bdev = latest_dev->bdev; @@ -1137,6 +1143,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) device->bdev = NULL; } clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); + btrfs_destroy_dev_zone_info(device); device->fs_info = NULL; atomic_set(&device->dev_stats_ccnt, 0); @@ -1217,6 +1224,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, fs_devices->latest_bdev = latest_dev->bdev; fs_devices->total_rw_bytes = 0; fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR; + fs_devices->read_policy = BTRFS_READ_POLICY_PID; return 0; } @@ -1268,7 +1276,7 @@ void btrfs_release_disk_super(struct btrfs_super_block *super) } static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev, - u64 bytenr) + u64 bytenr, u64 bytenr_orig) { struct btrfs_super_block *disk_super; struct page *page; @@ -1299,7 +1307,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev /* align our pointer to the offset of the super block */ disk_super = p + offset_in_page(bytenr); - if (btrfs_super_bytenr(disk_super) != bytenr || + if (btrfs_super_bytenr(disk_super) != bytenr_orig || btrfs_super_magic(disk_super) != BTRFS_MAGIC) { btrfs_release_disk_super(p); return ERR_PTR(-EINVAL); @@ -1334,7 +1342,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct block_device *bdev; - u64 bytenr; + u64 bytenr, bytenr_orig; + int ret; lockdep_assert_held(&uuid_mutex); @@ -1344,14 +1353,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, * So, we need to add a special mount option to scan for * later supers, using BTRFS_SUPER_MIRROR_MAX instead */ - bytenr = btrfs_sb_offset(0); flags |= FMODE_EXCL; bdev = blkdev_get_by_path(path, flags, holder); if (IS_ERR(bdev)) return ERR_CAST(bdev); - disk_super = btrfs_read_disk_super(bdev, bytenr); + bytenr_orig = btrfs_sb_offset(0); + ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr); + if (ret) + return ERR_PTR(ret); + + disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig); if (IS_ERR(disk_super)) { device = ERR_CAST(disk_super); goto error_bdev_put; @@ -2015,6 +2028,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, if (IS_ERR(disk_super)) continue; + if (bdev_is_zoned(bdev)) { + btrfs_reset_sb_log_zones(bdev, copy_num); + continue; + } + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); page = virt_to_page(disk_super); @@ -2293,10 +2311,10 @@ static struct btrfs_device *btrfs_find_device_by_path( dev_uuid = disk_super->dev_item.uuid; if (btrfs_fs_incompat(fs_info, METADATA_UUID)) device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - disk_super->metadata_uuid, true); + disk_super->metadata_uuid); else device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - disk_super->fsid, true); + disk_super->fsid); btrfs_release_disk_super(disk_super); if (!device) @@ -2316,7 +2334,7 @@ struct btrfs_device *btrfs_find_device_by_devspec( if (devid) { device = btrfs_find_device(fs_info->fs_devices, devid, NULL, - NULL, true); + NULL); if (!device) return ERR_PTR(-ENOENT); return device; @@ -2465,7 +2483,7 @@ next_slot: read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_FSID_SIZE); device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - fs_uuid, true); + fs_uuid); BUG_ON(!device); /* Logic error */ if (device->fs_devices->seeding) { @@ -2507,6 +2525,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (IS_ERR(bdev)) return PTR_ERR(bdev); + if (!btrfs_check_device_zone_type(fs_info, bdev)) { + ret = -EINVAL; + goto error; + } + if (fs_devices->seeding) { seeding_dev = 1; down_write(&sb->s_umount); @@ -2540,10 +2563,17 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } rcu_assign_pointer(device->name, name); + device->fs_info = fs_info; + device->bdev = bdev; + + ret = btrfs_get_dev_zone_info(device); + if (ret) + goto error_free_device; + trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - goto error_free_device; + goto error_free_zone; } q = bdev_get_queue(bdev); @@ -2556,8 +2586,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path fs_info->sectorsize); device->disk_total_bytes = device->total_bytes; device->commit_total_bytes = device->total_bytes; - device->fs_info = fs_info; - device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->mode = FMODE_EXCL; @@ -2704,6 +2732,8 @@ error_trans: sb->s_flags |= SB_RDONLY; if (trans) btrfs_end_transaction(trans); +error_free_zone: + btrfs_destroy_dev_zone_info(device); error_free_device: btrfs_free_device(device); error: @@ -5479,7 +5509,18 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, else num_stripes = map->num_stripes; - preferred_mirror = first + current->pid % num_stripes; + switch (fs_info->fs_devices->read_policy) { + default: + /* Shouldn't happen, just warn and use pid instead of failing */ + btrfs_warn_rl(fs_info, + "unknown read_policy type %u, reset to pid", + fs_info->fs_devices->read_policy); + fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID; + fallthrough; + case BTRFS_READ_POLICY_PID: + preferred_mirror = first + (current->pid % num_stripes); + break; + } if (dev_replace_is_ongoing && fs_info->dev_replace.cont_reading_from_srcdev_mode == @@ -6335,7 +6376,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, bio->bi_iter.bi_sector = physical >> 9; btrfs_debug_in_rcu(fs_info, "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", - bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector, + bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid, bio->bi_iter.bi_size); bio_set_dev(bio, dev->bdev); @@ -6367,7 +6408,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, { struct btrfs_device *dev; struct bio *first_bio = bio; - u64 logical = (u64)bio->bi_iter.bi_sector << 9; + u64 logical = bio->bi_iter.bi_sector << 9; u64 length = 0; u64 map_length; int ret; @@ -6447,8 +6488,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, * If @seed is true, traverse through the seed devices. */ struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, - u64 devid, u8 *uuid, u8 *fsid, - bool seed) + u64 devid, u8 *uuid, u8 *fsid) { struct btrfs_device *device; struct btrfs_fs_devices *seed_devs; @@ -6655,7 +6695,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, - devid, uuid, NULL, true); + devid, uuid, NULL); if (!map->stripes[i].dev && !btrfs_test_opt(fs_info, DEGRADED)) { free_extent_map(em); @@ -6794,7 +6834,7 @@ static int read_one_dev(struct extent_buffer *leaf, } device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - fs_uuid, true); + fs_uuid); if (!device) { if (!btrfs_test_opt(fs_info, DEGRADED)) { btrfs_report_missing_device(fs_info, devid, @@ -6857,6 +6897,16 @@ static int read_one_dev(struct extent_buffer *leaf, } fill_device_from_item(leaf, dev_item, device); + if (device->bdev) { + u64 max_total_bytes = i_size_read(device->bdev->bd_inode); + + if (device->total_bytes > max_total_bytes) { + btrfs_err(fs_info, + "device total_bytes should be at most %llu but found %llu", + max_total_bytes, device->total_bytes); + return -EINVAL; + } + } set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { @@ -6891,11 +6941,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will * overallocate but we can keep it as-is, only the first page is used. */ - sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET); + sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET, + root->root_key.objectid, 0); if (IS_ERR(sb)) return PTR_ERR(sb); set_extent_buffer_uptodate(sb); - btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* * The sb extent buffer is artificial and just used to read the system array. * set_extent_buffer_uptodate() call does not properly mark all it's @@ -7059,12 +7109,8 @@ static void readahead_tree_node_children(struct extent_buffer *node) int i; const int nr_items = btrfs_header_nritems(node); - for (i = 0; i < nr_items; i++) { - u64 start; - - start = btrfs_node_blockptr(node, i); - readahead_tree_block(node->fs_info, start); - } + for (i = 0; i < nr_items; i++) + btrfs_readahead_node_child(node, i); } int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) @@ -7451,8 +7497,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, int i; mutex_lock(&fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL, - true); + dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL); mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { @@ -7583,28 +7628,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } /* Make sure no dev extent is beyond device bondary */ - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true); + dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); if (!dev) { btrfs_err(fs_info, "failed to find devid %llu", devid); ret = -EUCLEAN; goto out; } - /* It's possible this device is a dummy for seed device */ - if (dev->disk_total_bytes == 0) { - struct btrfs_fs_devices *devs; - - devs = list_first_entry(&fs_info->fs_devices->seed_list, - struct btrfs_fs_devices, seed_list); - dev = btrfs_find_device(devs, devid, NULL, NULL, false); - if (!dev) { - btrfs_err(fs_info, "failed to find seed devid %llu", - devid); - ret = -EUCLEAN; - goto out; - } - } - if (physical_offset + physical_len > dev->disk_total_bytes) { btrfs_err(fs_info, "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", @@ -7659,6 +7689,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) u64 prev_dev_ext_end = 0; int ret = 0; + /* + * We don't have a dev_root because we mounted with ignorebadroots and + * failed to load the root, so we want to skip the verification in this + * case for sure. + * + * However if the dev root is fine, but the tree itself is corrupted + * we'd still fail to mount. This verification is only to make sure + * writes can happen safely, so instead just bypass this check + * completely in the case of IGNOREBADROOTS. + */ + if (btrfs_test_opt(fs_info, IGNOREBADROOTS)) + return 0; + key.objectid = 1; key.type = BTRFS_DEV_EXTENT_KEY; key.offset = 0; |