diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/compression.c | 1 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 35 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 67 | ||||
-rw-r--r-- | fs/btrfs/file.c | 16 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 14 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 36 | ||||
-rw-r--r-- | fs/btrfs/props.c | 30 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 8 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 5 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 27 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 4 | ||||
-rw-r--r-- | fs/btrfs/send.c | 52 | ||||
-rw-r--r-- | fs/btrfs/sysfs.c | 25 | ||||
-rw-r--r-- | fs/btrfs/tree-checker.c | 49 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 21 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 6 | ||||
-rw-r--r-- | fs/btrfs/zstd.c | 20 |
17 files changed, 270 insertions, 146 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index daf7908d1e35..84dd4a8980c5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1008,6 +1008,7 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, struct list_head *workspace; int ret; + level = btrfs_compress_op[type]->set_level(level); workspace = get_workspace(type, level); ret = btrfs_compress_op[type]->compress_pages(workspace, mapping, start, pages, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 55c15f31d00d..ee0989c7e3a9 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -603,17 +603,33 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); - return PTR_ERR(trans); + /* + * We have to use this loop approach because at this point src_device + * has to be available for transaction commit to complete, yet new + * chunks shouldn't be allocated on the device. + */ + while (1) { + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); + return PTR_ERR(trans); + } + ret = btrfs_commit_transaction(trans); + WARN_ON(ret); + + /* Prevent write_all_supers() during the finishing procedure */ + mutex_lock(&fs_info->fs_devices->device_list_mutex); + /* Prevent new chunks being allocated on the source device */ + mutex_lock(&fs_info->chunk_mutex); + + if (!list_empty(&src_device->post_commit_list)) { + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_info->chunk_mutex); + } else { + break; + } } - ret = btrfs_commit_transaction(trans); - WARN_ON(ret); - /* keep away write_all_supers() during the finishing procedure */ - mutex_lock(&fs_info->fs_devices->device_list_mutex); - mutex_lock(&fs_info->chunk_mutex); down_write(&dev_replace->rwsem); dev_replace->replace_state = scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED @@ -662,7 +678,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_device_set_disk_total_bytes(tgt_device, src_device->disk_total_bytes); btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); - ASSERT(list_empty(&src_device->post_commit_list)); tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->commit_bytes_used = src_device->bytes_used; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f79e477a378e..5faf057f6f37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -757,12 +757,14 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } static void add_pinned_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_ref *ref) + struct btrfs_ref *ref, int sign) { struct btrfs_space_info *space_info; - s64 num_bytes = -ref->len; + s64 num_bytes; u64 flags; + ASSERT(sign == 1 || sign == -1); + num_bytes = sign * ref->len; if (ref->type == BTRFS_REF_METADATA) { if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) flags = BTRFS_BLOCK_GROUP_SYSTEM; @@ -2063,7 +2065,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(fs_info, generic_ref); if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) - add_pinned_bytes(fs_info, generic_ref); + add_pinned_bytes(fs_info, generic_ref, -1); return ret; } @@ -3882,8 +3884,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags) info->space_info_kobj, "%s", alloc_name(space_info->flags)); if (ret) { - percpu_counter_destroy(&space_info->total_bytes_pinned); - kfree(space_info); + kobject_put(&space_info->kobj); return ret; } @@ -7190,7 +7191,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, } out: if (pin) - add_pinned_bytes(fs_info, &generic_ref); + add_pinned_bytes(fs_info, &generic_ref, 1); if (last_ref) { /* @@ -7238,7 +7239,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) btrfs_ref_tree_mod(fs_info, ref); if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) - add_pinned_bytes(fs_info, ref); + add_pinned_bytes(fs_info, ref, 1); return ret; } @@ -10830,17 +10831,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, remove_em = (atomic_read(&block_group->trimming) == 0); spin_unlock(&block_group->lock); - if (remove_em) { - struct extent_map_tree *em_tree; - - em_tree = &fs_info->mapping_tree.map_tree; - write_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - /* once for the tree */ - free_extent_map(em); - } - mutex_unlock(&fs_info->chunk_mutex); ret = remove_block_group_free_space(trans, block_group); @@ -10857,6 +10847,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + if (remove_em) { + struct extent_map_tree *em_tree; + + em_tree = &fs_info->mapping_tree.map_tree; + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + /* once for the tree */ + free_extent_map(em); + } out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); @@ -11136,13 +11139,11 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * it while performing the free space search since we have already * held back allocations. */ -static int btrfs_trim_free_extents(struct btrfs_device *device, - struct fstrim_range *range, u64 *trimmed) +static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) { - u64 start, len = 0, end = 0; + u64 start = SZ_1M, len = 0, end = 0; int ret; - start = max_t(u64, range->start, SZ_1M); *trimmed = 0; /* Discard not supported = nothing to do. */ @@ -11185,22 +11186,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } - /* Keep going until we satisfy minlen or reach end of space */ - if (len < range->minlen) { - mutex_unlock(&fs_info->chunk_mutex); - start += len; - continue; - } - - /* If we are out of the passed range break */ - if (start > range->start + range->len - 1) { - mutex_unlock(&fs_info->chunk_mutex); - break; - } - - start = max(range->start, start); - len = min(range->len, len); - ret = btrfs_issue_discard(device->bdev, start, len, &bytes); if (!ret) @@ -11215,10 +11200,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; - /* We've trimmed enough */ - if (*trimmed >= range->len) - break; - if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -11302,7 +11283,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range, &group_trimmed); + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; dev_ret = ret; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7e85dca0e6f2..89f5be2bfb43 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2068,6 +2068,18 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) u64 len; /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * The range length can be represented by u64, we have to do the typecasts * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() */ @@ -2554,10 +2566,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); - if (ret) { - inode_unlock(inode); + if (ret) goto out_only_mutex; - } path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9aba9660efe5..a2aabdb85226 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6433,8 +6433,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + name_len * 2); inode_inc_iversion(&parent_inode->vfs_inode); - parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime = - current_time(&parent_inode->vfs_inode); + /* + * If we are replaying a log tree, we do not want to update the mtime + * and ctime of the parent directory with the current time, since the + * log replay procedure is responsible for setting them to their correct + * values (the ones it had when the fsync was done). + */ + if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) { + struct timespec64 now = current_time(&parent_inode->vfs_inode); + + parent_inode->vfs_inode.i_mtime = now; + parent_inode->vfs_inode.i_ctime = now; + } ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode); if (ret) btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6dafa857bbb9..cfeff1b8dce0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -187,7 +187,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - unsigned int fsflags; + unsigned int fsflags, old_fsflags; int ret; const char *comp = NULL; u32 binode_flags = binode->flags; @@ -212,13 +212,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode_lock(inode); fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); - if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) & - (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; - goto out_unlock; - } - } + old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out_unlock; if (fsflags & FS_SYNC_FL) binode_flags |= BTRFS_INODE_SYNC; @@ -312,8 +309,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_abort_transaction(trans, ret); goto out_end_trans; } - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); } else { ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); @@ -378,9 +373,7 @@ static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(file_inode(file)); struct fsxattr fa; - memset(&fa, 0, sizeof(fa)); - fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags); - + simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags)); if (copy_to_user(arg, &fa, sizeof(fa))) return -EFAULT; @@ -393,7 +386,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - struct fsxattr fa; + struct fsxattr fa, old_fa; unsigned old_flags; unsigned old_i_flags; int ret = 0; @@ -404,7 +397,6 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) if (btrfs_root_readonly(root)) return -EROFS; - memset(&fa, 0, sizeof(fa)); if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; @@ -424,13 +416,11 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) old_flags = binode->flags; old_i_flags = inode->i_flags; - /* We need the capabilities to change append-only or immutable inode */ - if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) || - (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) && - !capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; + simple_fill_fsxattr(&old_fa, + btrfs_inode_flags_to_xflags(binode->flags)); + ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); + if (ret) goto out_unlock; - } if (fa.fsx_xflags & FS_XFLAG_SYNC) binode->flags |= BTRFS_INODE_SYNC; @@ -2930,8 +2920,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode_lock(inode); err = btrfs_delete_subvolume(dir, dentry); inode_unlock(inode); - if (!err) + if (!err) { + fsnotify_rmdir(dir, dentry); d_delete(dentry); + } out_dput: dput(dentry); diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index ca2716917e37..a9e2e66152ee 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -332,6 +332,7 @@ static int inherit_props(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; int ret; int i; + bool need_reserve = false; if (!test_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(parent)->runtime_flags)) @@ -357,11 +358,20 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (ret) continue; - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_block_rsv_add(root, trans->block_rsv, - num_bytes, BTRFS_RESERVE_NO_FLUSH); - if (ret) - return ret; + /* + * Currently callers should be reserving 1 item for properties, + * since we only have 1 property that we currently support. If + * we add more in the future we need to try and reserve more + * space for them. But we should also revisit how we do space + * reservations if we do add more properties in the future. + */ + if (need_reserve) { + num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + ret = btrfs_block_rsv_add(root, trans->block_rsv, + num_bytes, BTRFS_RESERVE_NO_FLUSH); + if (ret) + return ret; + } ret = btrfs_setxattr(trans, inode, h->xattr_name, value, strlen(value), 0); @@ -375,9 +385,13 @@ static int inherit_props(struct btrfs_trans_handle *trans, &BTRFS_I(inode)->runtime_flags); } - btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes); - if (ret) - return ret; + if (need_reserve) { + btrfs_block_rsv_release(fs_info, trans->block_rsv, + num_bytes); + if (ret) + return ret; + } + need_reserve = true; } return 0; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2f708f2c4e67..3e6ffbbd8b0a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3830,7 +3830,13 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, subvol_slot); block->last_snapshot = last_snapshot; block->level = level; - if (bg->flags & BTRFS_BLOCK_GROUP_DATA) + + /* + * If we have bg == NULL, we're called from btrfs_recover_relocation(), + * no one else can modify tree blocks thus we qgroup will not change + * no matter the value of trace_leaf. + */ + if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) block->trace_leaf = true; else block->trace_leaf = false; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 10d9589001a9..bb5bd49573b4 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -747,6 +747,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) u64 total = 0; int i; +again: do { enqueued = 0; mutex_lock(&fs_devices->device_list_mutex); @@ -758,6 +759,10 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); + if (fs_devices->seed) { + fs_devices = fs_devices->seed; + goto again; + } if (enqueued == 0) return; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a459ecddcce4..22a3c69864fa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2177,22 +2177,30 @@ static int clean_dirty_subvols(struct reloc_control *rc) struct btrfs_root *root; struct btrfs_root *next; int ret = 0; + int ret2; list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots, reloc_dirty_list) { - struct btrfs_root *reloc_root = root->reloc_root; + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + /* Merged subvolume, cleanup its reloc root */ + struct btrfs_root *reloc_root = root->reloc_root; - clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); - list_del_init(&root->reloc_dirty_list); - root->reloc_root = NULL; - if (reloc_root) { - int ret2; + clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); + list_del_init(&root->reloc_dirty_list); + root->reloc_root = NULL; + if (reloc_root) { - ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1); + ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1); + if (ret2 < 0 && !ret) + ret = ret2; + } + btrfs_put_fs_root(root); + } else { + /* Orphan reloc tree, just clean it up */ + ret2 = btrfs_drop_snapshot(root, NULL, 0, 1); if (ret2 < 0 && !ret) ret = ret2; } - btrfs_put_fs_root(root); } return ret; } @@ -2480,6 +2488,9 @@ again: } } else { list_del_init(&reloc_root->root_list); + /* Don't forget to queue this reloc root for cleanup */ + list_add_tail(&reloc_root->reloc_dirty_list, + &rc->dirty_subvol_roots); } } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1b9a5d0de139..22124122728c 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -132,10 +132,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root return -ENOMEM; ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, ret); + if (ret < 0) goto out; - } if (ret > 0) { btrfs_crit(fs_info, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index dd38dfe174df..f7fe4770f0e5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4999,6 +4999,12 @@ static int send_hole(struct send_ctx *sctx, u64 end) if (offset >= sctx->cur_inode_size) return 0; + /* + * Don't go beyond the inode's i_size due to prealloc extents that start + * after the i_size. + */ + end = min_t(u64, end, sctx->cur_inode_size); + if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, end - offset); @@ -5218,10 +5224,50 @@ static int clone_range(struct send_ctx *sctx, clone_len = min_t(u64, ext_len, len); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && - clone_data_offset == data_offset) - ret = send_clone(sctx, offset, clone_len, clone_root); - else + clone_data_offset == data_offset) { + const u64 src_end = clone_root->offset + clone_len; + const u64 sectorsize = SZ_64K; + + /* + * We can't clone the last block, when its size is not + * sector size aligned, into the middle of a file. If we + * do so, the receiver will get a failure (-EINVAL) when + * trying to clone or will silently corrupt the data in + * the destination file if it's on a kernel without the + * fix introduced by commit ac765f83f1397646 + * ("Btrfs: fix data corruption due to cloning of eof + * block). + * + * So issue a clone of the aligned down range plus a + * regular write for the eof block, if we hit that case. + * + * Also, we use the maximum possible sector size, 64K, + * because we don't know what's the sector size of the + * filesystem that receives the stream, so we have to + * assume the largest possible sector size. + */ + if (src_end == clone_src_i_size && + !IS_ALIGNED(src_end, sectorsize) && + offset + clone_len < sctx->cur_inode_size) { + u64 slen; + + slen = ALIGN_DOWN(src_end - clone_root->offset, + sectorsize); + if (slen > 0) { + ret = send_clone(sctx, offset, slen, + clone_root); + if (ret < 0) + goto out; + } + ret = send_extent_data(sctx, offset + slen, + clone_len - slen); + } else { + ret = send_clone(sctx, offset, clone_len, + clone_root); + } + } else { ret = send_extent_data(sctx, offset, clone_len); + } if (ret < 0) goto out; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 5a5930e3d32b..c1dfc97893ba 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -303,11 +303,12 @@ static ssize_t raid_bytes_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", val); } -static struct attribute *raid_attributes[] = { +static struct attribute *raid_attrs[] = { BTRFS_ATTR_PTR(raid, total_bytes), BTRFS_ATTR_PTR(raid, used_bytes), NULL }; +ATTRIBUTE_GROUPS(raid); static void release_raid_kobj(struct kobject *kobj) { @@ -317,7 +318,7 @@ static void release_raid_kobj(struct kobject *kobj) struct kobj_type btrfs_raid_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = release_raid_kobj, - .default_attrs = raid_attributes, + .default_groups = raid_groups, }; #define SPACE_INFO_ATTR(field) \ @@ -364,6 +365,7 @@ static struct attribute *space_info_attrs[] = { BTRFS_ATTR_PTR(space_info, total_bytes_pinned), NULL, }; +ATTRIBUTE_GROUPS(space_info); static void space_info_release(struct kobject *kobj) { @@ -375,7 +377,7 @@ static void space_info_release(struct kobject *kobj) struct kobj_type space_info_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = space_info_release, - .default_attrs = space_info_attrs, + .default_groups = space_info_groups, }; static const struct attribute *allocation_attrs[] = { @@ -825,7 +827,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, fs_devs->fsid_kobj.kset = btrfs_kset; error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, parent, "%pU", fs_devs->fsid); - return error; + if (error) { + kobject_put(&fs_devs->fsid_kobj); + return error; + } + + return 0; } int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) @@ -905,12 +912,10 @@ void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); } -static int btrfs_init_debugfs(void) +static void btrfs_init_debugfs(void) { #ifdef CONFIG_DEBUG_FS btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); - if (!btrfs_debugfs_root_dentry) - return -ENOMEM; /* * Example code, how to export data through debugfs. @@ -924,7 +929,6 @@ static int btrfs_init_debugfs(void) #endif #endif - return 0; } int __init btrfs_init_sysfs(void) @@ -935,9 +939,7 @@ int __init btrfs_init_sysfs(void) if (!btrfs_kset) return -ENOMEM; - ret = btrfs_init_debugfs(); - if (ret) - goto out1; + btrfs_init_debugfs(); init_feature_attrs(); ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); @@ -954,7 +956,6 @@ out_remove_group: sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); out2: debugfs_remove_recursive(btrfs_debugfs_root_dentry); -out1: kset_unregister(btrfs_kset); return ret; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 748cd1598255..96fce4bef4e7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -107,8 +107,26 @@ static void file_extent_err(const struct extent_buffer *eb, int slot, (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ }) +static u64 file_extent_end(struct extent_buffer *leaf, + struct btrfs_key *key, + struct btrfs_file_extent_item *extent) +{ + u64 end; + u64 len; + + if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + end = ALIGN(key->offset + len, leaf->fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + end = key->offset + len; + } + return end; +} + static int check_extent_data_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_file_extent_item *fi; @@ -188,6 +206,28 @@ static int check_extent_data_item(struct extent_buffer *leaf, CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)) return -EUCLEAN; + + /* + * Check that no two consecutive file extent items, in the same leaf, + * present ranges that overlap each other. + */ + if (slot > 0 && + prev_key->objectid == key->objectid && + prev_key->type == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *prev_fi; + u64 prev_end; + + prev_fi = btrfs_item_ptr(leaf, slot - 1, + struct btrfs_file_extent_item); + prev_end = file_extent_end(leaf, prev_key, prev_fi); + if (prev_end > key->offset) { + file_extent_err(leaf, slot - 1, +"file extent end range (%llu) goes beyond start offset (%llu) of the next file extent", + prev_end, key->offset); + return -EUCLEAN; + } + } + return 0; } @@ -774,14 +814,15 @@ static int check_inode_item(struct extent_buffer *leaf, * Common point to switch the item-specific validation. */ static int check_leaf_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { int ret = 0; struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(leaf, key, slot); + ret = check_extent_data_item(leaf, key, slot, prev_key); break; case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(leaf, key, slot); @@ -928,7 +969,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) * Check if the item size and content meet other * criteria */ - ret = check_leaf_item(leaf, &key, slot); + ret = check_leaf_item(leaf, &key, slot, &prev_key); if (ret < 0) return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6adcd8a2c5c7..3fc8d854d7fb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3110,6 +3110,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, log->log_transid = root->log_transid; root->log_start_pid = 0; /* + * Update or create log root item under the root's log_mutex to prevent + * races with concurrent log syncs that can lead to failure to update + * log root item because it was not created yet. + */ + ret = update_log_root(trans, log); + /* * IO has been started, blocks of the log tree have WRITTEN flag set * in their headers. new modifications of the log will be written to * new positions. so it's safe to allow log writers to go in. @@ -3128,8 +3134,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); - ret = update_log_root(trans, log); - mutex_lock(&log_root_tree->log_mutex); if (atomic_dec_and_test(&log_root_tree->log_writers)) { /* atomic_dec_and_test implies a barrier */ @@ -4182,6 +4186,7 @@ fill_holes: *last_extent, 0, 0, len, 0, len, 0, 0, 0); + *last_extent += len; } } } @@ -5477,7 +5482,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, { int ret = 0; struct dentry *old_parent = NULL; - struct btrfs_inode *orig_inode = inode; /* * for regular files, if its inode is already on disk, we don't @@ -5497,16 +5501,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, } while (1) { - /* - * If we are logging a directory then we start with our inode, - * not our parent's inode, so we need to skip setting the - * logged_trans so that further down in the log code we don't - * think this inode has already been logged. - */ - if (inode != orig_inode) - inode->logged_trans = trans->transid; - smp_mb(); - if (btrfs_must_commit_transaction(trans, inode)) { ret = 1; break; @@ -6383,7 +6377,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * if this directory was already logged any new * names for this file/dir will get recorded */ - smp_mb(); if (dir->logged_trans == trans->transid) return; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 78b6ba2029e8..95d9aebff2c4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -213,6 +213,9 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, } out: btrfs_free_path(path); + if (!ret) + set_bit(BTRFS_INODE_COPY_EVERYTHING, + &BTRFS_I(inode)->runtime_flags); return ret; } @@ -236,7 +239,6 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); out: @@ -388,8 +390,6 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, if (!ret) { inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); } diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index a6ff07cf11d5..3837ca180d52 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES; struct list_head *pos, *next; - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); if (list_empty(&wsm.lru_list)) { - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); return; } @@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) if (!list_empty(&wsm.lru_list)) mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); } /* @@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void) struct workspace *workspace; int i; - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { while (!list_empty(&wsm.idle_ws[i])) { workspace = container_of(wsm.idle_ws[i].next, @@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void) zstd_free_workspace(&workspace->list); } } - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); del_timer_sync(&wsm.timer); } @@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level) struct workspace *workspace; int i = level - 1; - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) { if (!list_empty(&wsm.idle_ws[i])) { ws = wsm.idle_ws[i].next; @@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level) list_del(&workspace->lru_list); if (list_empty(&wsm.idle_ws[i])) clear_bit(i, &wsm.active_map); - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); return ws; } } - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); return NULL; } @@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws) { struct workspace *workspace = list_to_workspace(ws); - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); /* A node is only taken off the lru if we are the corresponding level */ if (workspace->req_level == workspace->level) { @@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws) list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]); workspace->req_level = 0; - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); if (workspace->level == ZSTD_BTRFS_MAX_LEVEL) cond_wake_up(&wsm.wait); |