diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 12:26:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 12:26:57 -0700 |
commit | 547635c6ac47c7556d6954935b189defe90422f7 (patch) | |
tree | 5545e515b7916df8a08aca89ec95401a54e00289 /fs/btrfs/volumes.c | |
parent | f678c890c684373a387b0d73cd4d51edbf329c27 (diff) | |
parent | c02d35d89b317994bd713ba82e160c5e7f22d9c8 (diff) |
Merge tag 'for-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"No new features, the bulk of the changes are fixes, refactoring and
cleanups. The notable fix is the scrub performance restoration after
rewrite in 6.4, though still only partial.
Fixes:
- scrub performance drop due to rewrite in 6.4 partially restored:
- do IO grouping by blg_plug/blk_unplug again
- avoid unnecessary tree searches when processing stripes, in
extent and checksum trees
- the drop is noticeable on fast PCIe devices, -66% and restored
to -33% of the original
- backports to 6.4 planned
- handle more corner cases of transaction commit during orphan
cleanup or delayed ref processing
- use correct fsid/metadata_uuid when validating super block
- copy directory permissions and time when creating a stub subvolume
Core:
- debugging feature integrity checker deprecated, to be removed in
6.7
- in zoned mode, zones are activated just before the write, making
error handling easier, now the overcommit mechanism can be enabled
again which improves performance by avoiding more frequent flushing
- v0 extent handling completely removed, deprecated long time ago
- error handling improvements
- tests:
- extent buffer bitmap tests
- pinned extent splitting tests
- cleanups and refactoring:
- compression writeback
- extent buffer bitmap
- space flushing, ENOSPC handling"
* tag 'for-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (110 commits)
btrfs: zoned: skip splitting and logical rewriting on pre-alloc write
btrfs: tests: test invalid splitting when skipping pinned drop extent_map
btrfs: tests: add a test for btrfs_add_extent_mapping
btrfs: tests: add extent_map tests for dropping with odd layouts
btrfs: scrub: move write back of repaired sectors to scrub_stripe_read_repair_worker()
btrfs: scrub: don't go ordered workqueue for dev-replace
btrfs: scrub: fix grouping of read IO
btrfs: scrub: avoid unnecessary csum tree search preparing stripes
btrfs: scrub: avoid unnecessary extent tree search preparing stripes
btrfs: copy dir permission and time when creating a stub subvolume
btrfs: remove pointless empty list check when reading delayed dir indexes
btrfs: drop redundant check to use fs_devices::metadata_uuid
btrfs: compare the correct fsid/metadata_uuid in btrfs_validate_super
btrfs: use the correct superblock to compare fsid in btrfs_validate_super
btrfs: simplify memcpy either of metadata_uuid or fsid
btrfs: add a helper to read the superblock metadata_uuid
btrfs: remove v0 extent handling
btrfs: output extra debug info if we failed to find an inline backref
btrfs: move the !zoned assert into run_delalloc_cow
btrfs: consolidate the error handling in run_delalloc_nocow
...
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 94 |
1 files changed, 63 insertions, 31 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f6718999d183..9621455edebc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -681,6 +681,14 @@ error_free_page: return -EINVAL; } +u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb) +{ + bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) & + BTRFS_FEATURE_INCOMPAT_METADATA_UUID); + + return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; +} + /* * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices * being created with a disk that has already completed its fsid change. Such @@ -833,15 +841,8 @@ static noinline struct btrfs_device *device_list_add(const char *path, found_transid > fs_devices->latest_generation) { memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); - - if (has_metadata_uuid) - memcpy(fs_devices->metadata_uuid, - disk_super->metadata_uuid, - BTRFS_FSID_SIZE); - else - memcpy(fs_devices->metadata_uuid, - disk_super->fsid, BTRFS_FSID_SIZE); - + memcpy(fs_devices->metadata_uuid, + btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE); fs_devices->fsid_change = false; } } @@ -851,8 +852,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (fs_devices->opened) { btrfs_err(NULL, - "device %s belongs to fsid %pU, and the fs is already mounted", - path, fs_devices->fsid); +"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)", + path, fs_devices->fsid, current->comm, + task_pid_nr(current)); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); } @@ -1424,9 +1426,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, lockdep_assert_held(&device->fs_info->chunk_mutex); - if (!find_first_extent_bit(&device->alloc_state, *start, - &physical_start, &physical_end, - CHUNK_ALLOCATED, NULL)) { + if (find_first_extent_bit(&device->alloc_state, *start, + &physical_start, &physical_end, + CHUNK_ALLOCATED, NULL)) { if (in_range(physical_start, *start, len) || in_range(*start, physical_start, @@ -1438,18 +1440,18 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, return false; } -static u64 dev_extent_search_start(struct btrfs_device *device, u64 start) +static u64 dev_extent_search_start(struct btrfs_device *device) { switch (device->fs_devices->chunk_alloc_policy) { case BTRFS_CHUNK_ALLOC_REGULAR: - return max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED); + return BTRFS_DEVICE_RANGE_RESERVED; case BTRFS_CHUNK_ALLOC_ZONED: /* * We don't care about the starting region like regular * allocator, because we anyway use/reserve the first two zones * for superblock logging. */ - return ALIGN(start, device->zone_info->zone_size); + return 0; default: BUG(); } @@ -1581,15 +1583,15 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start, * correct usable device space, as device extent freed in current transaction * is not reported as available. */ -static int find_free_dev_extent_start(struct btrfs_device *device, - u64 num_bytes, u64 search_start, u64 *start, - u64 *len) +static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, + u64 *start, u64 *len) { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_root *root = fs_info->dev_root; struct btrfs_key key; struct btrfs_dev_extent *dev_extent; struct btrfs_path *path; + u64 search_start; u64 hole_size; u64 max_hole_start; u64 max_hole_size; @@ -1599,7 +1601,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device, int slot; struct extent_buffer *l; - search_start = dev_extent_search_start(device, search_start); + search_start = dev_extent_search_start(device); WARN_ON(device->zone_info && !IS_ALIGNED(num_bytes, device->zone_info->zone_size)); @@ -1725,13 +1727,6 @@ out: return ret; } -int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, - u64 *start, u64 *len) -{ - /* FIXME use last free of some kind */ - return find_free_dev_extent_start(device, num_bytes, 0, start, len); -} - static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 start, u64 *dev_extent_len) @@ -6217,6 +6212,45 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup * stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr); } +/* + * Map one logical range to one or more physical ranges. + * + * @length: (Mandatory) mapped length of this run. + * One logical range can be split into different segments + * due to factors like zones and RAID0/5/6/10 stripe + * boundaries. + * + * @bioc_ret: (Mandatory) returned btrfs_io_context structure. + * which has one or more physical ranges (btrfs_io_stripe) + * recorded inside. + * Caller should call btrfs_put_bioc() to free it after use. + * + * @smap: (Optional) single physical range optimization. + * If the map request can be fulfilled by one single + * physical range, and this is parameter is not NULL, + * then @bioc_ret would be NULL, and @smap would be + * updated. + * + * @mirror_num_ret: (Mandatory) returned mirror number if the original + * value is 0. + * + * Mirror number 0 means to choose any live mirrors. + * + * For non-RAID56 profiles, non-zero mirror_num means + * the Nth mirror. (e.g. mirror_num 1 means the first + * copy). + * + * For RAID56 profile, mirror 1 means rebuild from P and + * the remaining data stripes. + * + * For RAID6 profile, mirror > 2 means mark another + * data/P stripe error and rebuild from the remaining + * stripes.. + * + * @need_raid_map: (Used only for integrity checker) whether the map wants + * a full stripe map (including all data and P/Q stripes) + * for RAID56. Should always be 1 except integrity checker. + */ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_io_context **bioc_ret, @@ -6391,9 +6425,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * I/O context structure. */ if (smap && num_alloc_stripes == 1 && - !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) && - (op == BTRFS_MAP_READ || !dev_replace_is_ongoing || - !dev_replace->tgtdev)) { + !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) { set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr); if (mirror_num_ret) *mirror_num_ret = mirror_num; |