aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c106
1 files changed, 59 insertions, 47 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ea3ec1e761e8..2801c991814f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2220,6 +2220,12 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
}
if (!mutex_trylock(&head->mutex)) {
+ if (path->nowait) {
+ spin_unlock(&delayed_refs->lock);
+ btrfs_put_transaction(cur_trans);
+ return -EAGAIN;
+ }
+
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
@@ -2551,17 +2557,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
return -EINVAL;
/*
- * pull in the free space cache (if any) so that our pin
- * removes the free space from the cache. We have load_only set
- * to one because the slow code to read in the free extents does check
- * the pinned extents.
+ * Fully cache the free space first so that our pin removes the free space
+ * from the cache.
*/
- btrfs_cache_block_group(cache, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret)
goto out;
@@ -2584,12 +2583,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!block_group)
return -EINVAL;
- btrfs_cache_block_group(block_group, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(block_group);
+ ret = btrfs_cache_block_group(block_group, true);
if (ret)
goto out;
@@ -2698,13 +2692,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
len = cache->start + cache->length - start;
len = min(len, end + 1 - start);
- down_read(&fs_info->commit_root_sem);
- if (start < cache->last_byte_to_unpin && return_free_space) {
- u64 add_len = min(len, cache->last_byte_to_unpin - start);
-
- btrfs_add_free_space(cache, start, add_len);
- }
- up_read(&fs_info->commit_root_sem);
+ if (return_free_space)
+ btrfs_add_free_space(cache, start, len);
start += len;
total_unpinned += len;
@@ -3306,21 +3295,22 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
/*
- * If this is a leaf and there are tree mod log users, we may
- * have recorded mod log operations that point to this leaf.
- * So we must make sure no one reuses this leaf's extent before
- * mod log operations are applied to a node, otherwise after
- * rewinding a node using the mod log operations we get an
- * inconsistent btree, as the leaf's extent may now be used as
- * a node or leaf for another different btree.
+ * If there are tree mod log users we may have recorded mod log
+ * operations for this node. If we re-allocate this node we
+ * could replay operations on this node that happened when it
+ * existed in a completely different root. For example if it
+ * was part of root A, then was reallocated to root B, and we
+ * are doing a btrfs_old_search_slot(root b), we could replay
+ * operations that happened when the block was part of root A,
+ * giving us an inconsistent view of the btree.
+ *
* We are safe from races here because at this point no other
* node or root points to this extent buffer, so if after this
- * check a new tree mod log user joins, it will not be able to
- * find a node pointing to this leaf and record operations that
- * point to this leaf.
+ * check a new tree mod log user joins we will not have an
+ * existing log of operations on this node that we have to
+ * contend with.
*/
- if (btrfs_header_level(buf) == 0 &&
- test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
+ if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
must_pin = true;
if (must_pin || btrfs_is_zoned(fs_info)) {
@@ -3816,7 +3806,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
block_group->start == fs_info->data_reloc_bg ||
fs_info->data_reloc_bg == 0);
- if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
+ if (block_group->ro ||
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
ret = 1;
goto out;
}
@@ -3893,7 +3884,7 @@ out:
* regular extents) at the same time to the same zone, which
* easily break the write pointer.
*/
- block_group->zoned_data_reloc_ongoing = 1;
+ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
fs_info->data_reloc_bg = 0;
}
spin_unlock(&fs_info->relocation_bg_lock);
@@ -4399,7 +4390,7 @@ have_block_group:
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
- ret = btrfs_cache_block_group(block_group, 0);
+ ret = btrfs_cache_block_group(block_group, false);
/*
* If we get ENOMEM here or something else we want to
@@ -4867,6 +4858,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *buf;
+ u64 lockdep_owner = owner;
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
if (IS_ERR(buf))
@@ -4886,11 +4878,29 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
/*
+ * The reloc trees are just snapshots, so we need them to appear to be
+ * just like any other fs tree WRT lockdep.
+ *
+ * The exception however is in replace_path() in relocation, where we
+ * hold the lock on the original fs root and then search for the reloc
+ * root. At that point we need to make sure any reloc root buffers are
+ * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
+ * lockdep happy.
+ */
+ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
+ !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
+ lockdep_owner = BTRFS_FS_TREE_OBJECTID;
+
+ /* btrfs_clean_tree_block() accesses generation field. */
+ btrfs_set_header_generation(buf, trans->transid);
+
+ /*
* This needs to stay, because we could allocate a freed block from an
* old tree into a new tree, so we need to make sure this new block is
* set to the appropriate level and owner.
*/
- btrfs_set_buffer_lockdep_class(owner, buf, level);
+ btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
+
__btrfs_tree_lock(buf, nest);
btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
@@ -5635,6 +5645,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
*/
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
{
+ const bool is_reloc_root = (root->root_key.objectid ==
+ BTRFS_TREE_RELOC_OBJECTID);
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
@@ -5794,6 +5806,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
}
+ if (!is_reloc_root)
+ btrfs_set_last_root_drop_gen(fs_info, trans->transid);
+
btrfs_end_transaction_throttle(trans);
if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
btrfs_debug(fs_info,
@@ -5828,7 +5843,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
}
- if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ if (!is_reloc_root) {
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
if (ret < 0) {
@@ -5860,6 +5875,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
btrfs_put_root(root);
root_dropped = true;
out_end_trans:
+ if (!is_reloc_root)
+ btrfs_set_last_root_drop_gen(fs_info, trans->transid);
+
btrfs_end_transaction_throttle(trans);
out_free:
kfree(wc);
@@ -6153,13 +6171,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
if (end - start >= range->minlen) {
if (!btrfs_block_group_done(cache)) {
- ret = btrfs_cache_block_group(cache, 0);
- if (ret) {
- bg_failed++;
- bg_ret = ret;
- continue;
- }
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret) {
bg_failed++;
bg_ret = ret;