From 0024652895e3479cd0d372f63b57d9581a0bdd38 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Jan 2020 09:33:01 -0500 Subject: btrfs: rename btrfs_put_fs_root and btrfs_grab_fs_root We are now using these for all roots, rename them to btrfs_put_root() and btrfs_grab_root(); Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a7bc66121330..2b4c3ca5e651 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5418,7 +5418,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } else { free_extent_buffer(root->node); free_extent_buffer(root->commit_root); - btrfs_put_fs_root(root); + btrfs_put_root(root); } root_dropped = true; out_end_trans: -- cgit From bf31f87f71cc7a89871ab0a451c047a0c0144bf1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2020 17:34:34 +0100 Subject: btrfs: add wrapper for transaction abort predicate The status of aborted transaction can change between calls and it needs to be accessed by READ_ONCE. Add a helper that also wraps the unlikely hint. Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 2 +- fs/btrfs/delayed-inode.c | 2 +- fs/btrfs/extent-tree.c | 10 +++++----- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 25 +++++++++++++------------ fs/btrfs/transaction.h | 12 ++++++++++++ 6 files changed, 33 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 7f09147872dc..c12d6399c6d7 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2345,7 +2345,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group, return 0; } - if (trans->aborted) + if (TRANS_ABORTED(trans)) return 0; again: inode = lookup_free_space_inode(block_group, path); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 9b23883a1086..ee70584ddc45 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1139,7 +1139,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr) int ret = 0; bool count = (nr > 0); - if (trans->aborted) + if (TRANS_ABORTED(trans)) return -EIO; path = btrfs_alloc_path(); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2b4c3ca5e651..da3d9eaf3b22 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1583,7 +1583,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, int err = 0; int metadata = !extent_op->is_data; - if (trans->aborted) + if (TRANS_ABORTED(trans)) return 0; if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) @@ -1703,7 +1703,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, { int ret = 0; - if (trans->aborted) { + if (TRANS_ABORTED(trans)) { if (insert_reserved) btrfs_pin_extent(trans->fs_info, node->bytenr, node->num_bytes, 1); @@ -2191,7 +2191,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, int run_all = count == (unsigned long)-1; /* We'll clean this up in btrfs_cleanup_transaction */ - if (trans->aborted) + if (TRANS_ABORTED(trans)) return 0; if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags)) @@ -2913,7 +2913,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) else unpin = &fs_info->freed_extents[0]; - while (!trans->aborted) { + while (!TRANS_ABORTED(trans)) { struct extent_state *cached_state = NULL; mutex_lock(&fs_info->unused_bg_unpin_mutex); @@ -2950,7 +2950,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) u64 trimmed = 0; ret = -EROFS; - if (!trans->aborted) + if (!TRANS_ABORTED(trans)) ret = btrfs_discard_extent(fs_info, block_group->start, block_group->length, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6420df4e86d7..5c16b4bcde9b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -244,7 +244,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = trans->fs_info; - trans->aborted = errno; + WRITE_ONCE(trans->aborted, errno); /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ if (!trans->dirty && list_empty(&trans->new_bgs)) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bcf23b06e67f..37680351b7c3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -243,7 +243,7 @@ loop: cur_trans = fs_info->running_transaction; if (cur_trans) { - if (cur_trans->aborted) { + if (TRANS_ABORTED(cur_trans)) { spin_unlock(&fs_info->trans_lock); return cur_trans->aborted; } @@ -459,7 +459,7 @@ static inline int is_transaction_blocked(struct btrfs_transaction *trans) { return (trans->state >= TRANS_STATE_COMMIT_START && trans->state < TRANS_STATE_UNBLOCKED && - !trans->aborted); + !TRANS_ABORTED(trans)); } /* wait for commit against the current transaction to become unblocked @@ -478,7 +478,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info) wait_event(fs_info->transaction_wait, cur_trans->state >= TRANS_STATE_UNBLOCKED || - cur_trans->aborted); + TRANS_ABORTED(cur_trans)); btrfs_put_transaction(cur_trans); } else { spin_unlock(&fs_info->trans_lock); @@ -937,7 +937,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (throttle) btrfs_run_delayed_iputs(info); - if (trans->aborted || + if (TRANS_ABORTED(trans) || test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) { wake_up_process(info->transaction_kthread); err = -EIO; @@ -1794,7 +1794,8 @@ static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info, struct btrfs_transaction *trans) { wait_event(fs_info->transaction_blocked_wait, - trans->state >= TRANS_STATE_COMMIT_START || trans->aborted); + trans->state >= TRANS_STATE_COMMIT_START || + TRANS_ABORTED(trans)); } /* @@ -1806,7 +1807,8 @@ static void wait_current_trans_commit_start_and_unblock( struct btrfs_transaction *trans) { wait_event(fs_info->transaction_wait, - trans->state >= TRANS_STATE_UNBLOCKED || trans->aborted); + trans->state >= TRANS_STATE_UNBLOCKED || + TRANS_ABORTED(trans)); } /* @@ -2026,7 +2028,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) trans->dirty = true; /* Stop the commit early if ->aborted is set */ - if (unlikely(READ_ONCE(cur_trans->aborted))) { + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; btrfs_end_transaction(trans); return ret; @@ -2100,7 +2102,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wait_for_commit(cur_trans); - if (unlikely(cur_trans->aborted)) + if (TRANS_ABORTED(cur_trans)) ret = cur_trans->aborted; btrfs_put_transaction(cur_trans); @@ -2119,7 +2121,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) spin_unlock(&fs_info->trans_lock); wait_for_commit(prev_trans); - ret = prev_trans->aborted; + ret = READ_ONCE(prev_trans->aborted); btrfs_put_transaction(prev_trans); if (ret) @@ -2173,8 +2175,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1); - /* ->aborted might be set after the previous check, so check it */ - if (unlikely(READ_ONCE(cur_trans->aborted))) { + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; goto scrub_continue; } @@ -2292,7 +2293,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * The tasks which save the space cache and inode cache may also * update ->aborted, check it. */ - if (unlikely(READ_ONCE(cur_trans->aborted))) { + if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->reloc_mutex); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 49f7196368f5..453cea7c7a72 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -115,6 +115,10 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *orig_rsv; refcount_t use_count; unsigned int type; + /* + * Error code of transaction abort, set outside of locks and must use + * the READ_ONCE/WRITE_ONCE access + */ short aborted; bool adding_csums; bool allocating_chunk; @@ -126,6 +130,14 @@ struct btrfs_trans_handle { struct list_head new_bgs; }; +/* + * The abort status can be changed between calls and is not protected by locks. + * This accepts btrfs_transaction and btrfs_trans_handle as types. Once it's + * set to a non-zero value it does not change, so the macro should be in checks + * but is not necessary for further reads of the value. + */ +#define TRANS_ABORTED(trans) (unlikely(READ_ONCE((trans)->aborted))) + struct btrfs_pending_snapshot { struct dentry *dentry; struct inode *dir; -- cgit From b25c36f84b59a64fd5815f341b6ddbd8a8a2bb56 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:09 +0200 Subject: btrfs: Make btrfs_pin_extent take trans handle Preparation for switching pinned extent tracking to a per-transaction basis. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 17 ++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ffd99c3f64db..2f0ca1cb1fb9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2463,8 +2463,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 offset, int metadata, u64 *refs, u64 *flags); -int btrfs_pin_extent(struct btrfs_fs_info *fs_info, - u64 bytenr, u64 num, int reserved); +int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, + int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes); int btrfs_exclude_logged_extents(struct extent_buffer *eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index da3d9eaf3b22..5a90d9d8b665 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1705,8 +1705,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, if (TRANS_ABORTED(trans)) { if (insert_reserved) - btrfs_pin_extent(trans->fs_info, node->bytenr, - node->num_bytes, 1); + btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); return 0; } @@ -1721,8 +1720,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, else BUG(); if (ret && insert_reserved) - btrfs_pin_extent(trans->fs_info, node->bytenr, - node->num_bytes, 1); + btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); return ret; } @@ -1867,8 +1865,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, spin_unlock(&delayed_refs->lock); if (head->must_insert_reserved) { - btrfs_pin_extent(fs_info, head->bytenr, - head->num_bytes, 1); + btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1); if (head->is_data) { ret = btrfs_del_csums(trans, fs_info->csum_root, head->bytenr, head->num_bytes); @@ -2612,14 +2609,12 @@ static int pin_down_extent(struct btrfs_block_group *cache, return 0; } -int btrfs_pin_extent(struct btrfs_fs_info *fs_info, +int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, int reserved) { struct btrfs_block_group *cache; - ASSERT(fs_info->running_transaction); - - cache = btrfs_lookup_block_group(fs_info, bytenr); + cache = btrfs_lookup_block_group(trans->fs_info, bytenr); BUG_ON(!cache); /* Logic error */ pin_down_extent(cache, bytenr, num_bytes, reserved); @@ -3345,7 +3340,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) (ref->type == BTRFS_REF_DATA && ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) { /* unlocks the pinned mutex */ - btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1); + btrfs_pin_extent(trans, ref->bytenr, ref->len, 1); old_ref_mod = new_ref_mod = 0; ret = 0; } else if (ref->type == BTRFS_REF_METADATA) { -- cgit From 7bfc10070573591163dae7be9bb09552d4e6dee5 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:12 +0200 Subject: btrfs: Make btrfs_pin_reserved_extent take transaction handle btrfs_pin_reserved_extent is now only called with a valid transaction so exploit the fact to take a transaction. This is preparation for tracking pinned extents on a per-transaction basis. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 8 +++++--- fs/btrfs/tree-log.c | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2f0ca1cb1fb9..3518cbd07015 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2501,7 +2501,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len, int delalloc); -int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, +int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, u64 len); void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5a90d9d8b665..6a61e972593e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4184,14 +4184,16 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) +int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, + u64 len) { struct btrfs_block_group *cache; int ret = 0; - cache = btrfs_lookup_block_group(fs_info, start); + cache = btrfs_lookup_block_group(trans->fs_info, start); if (!cache) { - btrfs_err(fs_info, "unable to find block group for %llu", start); + btrfs_err(trans->fs_info, "unable to find block group for %llu", + start); return -ENOSPC; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c27e121603ac..8471f3c5525e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2745,7 +2745,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, btrfs_clean_tree_block(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); - ret = btrfs_pin_reserved_extent(fs_info, + ret = btrfs_pin_reserved_extent(trans, bytenr, blocksize); if (ret) { free_extent_buffer(next); @@ -2814,7 +2814,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, btrfs_clean_tree_block(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); - ret = btrfs_pin_reserved_extent(fs_info, + ret = btrfs_pin_reserved_extent(trans, path->nodes[*level]->start, path->nodes[*level]->len); if (ret) @@ -2896,7 +2896,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, btrfs_clean_tree_block(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); - ret = btrfs_pin_reserved_extent(fs_info, + ret = btrfs_pin_reserved_extent(trans, next->start, next->len); if (ret) goto out; -- cgit From 9fce5704542c5e97d458cc97f9cecef253f02f06 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:13 +0200 Subject: btrfs: Make btrfs_pin_extent_for_log_replay take transaction handle Preparation for refactoring pinned extents tracking. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/tree-log.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3518cbd07015..22d0cb0019d1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2465,7 +2465,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 offset, int metadata, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, int reserved); -int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes); int btrfs_exclude_logged_extents(struct extent_buffer *eb); int btrfs_cross_ref_exist(struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6a61e972593e..e6ea01d76659 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2626,13 +2626,13 @@ int btrfs_pin_extent(struct btrfs_trans_handle *trans, /* * this function must be called within transaction */ -int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes) { struct btrfs_block_group *cache; int ret; - cache = btrfs_lookup_block_group(fs_info, bytenr); + cache = btrfs_lookup_block_group(trans->fs_info, bytenr); if (!cache) return -EINVAL; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8471f3c5525e..19c107be9ef6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -313,7 +313,7 @@ static int process_one_buffer(struct btrfs_root *log, } if (wc->pin) - ret = btrfs_pin_extent_for_log_replay(fs_info, eb->start, + ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start, eb->len); if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { @@ -6189,7 +6189,7 @@ again: * each subsequent pass. */ if (ret == -ENOENT) - ret = btrfs_pin_extent_for_log_replay(fs_info, + ret = btrfs_pin_extent_for_log_replay(trans, log->node->start, log->node->len); free_extent_buffer(log->node); -- cgit From 6690d07126e1eadd37fccb7e8e275b3ab7bdb524 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:14 +0200 Subject: btrfs: Make pin_down_extent take transaction handle All callers have a reference to a transaction handle so pass it to pin_down_extent. This is the final step before switching pinned extent tracking to a per-transaction basis. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6ea01d76659..425eefeaacf8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2585,7 +2585,8 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start) return bytenr; } -static int pin_down_extent(struct btrfs_block_group *cache, +static int pin_down_extent(struct btrfs_trans_handle *trans, + struct btrfs_block_group *cache, u64 bytenr, u64 num_bytes, int reserved) { struct btrfs_fs_info *fs_info = cache->fs_info; @@ -2617,7 +2618,7 @@ int btrfs_pin_extent(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(trans->fs_info, bytenr); BUG_ON(!cache); /* Logic error */ - pin_down_extent(cache, bytenr, num_bytes, reserved); + pin_down_extent(trans, cache, bytenr, num_bytes, reserved); btrfs_put_block_group(cache); return 0; @@ -2644,7 +2645,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, */ btrfs_cache_block_group(cache, 1); - pin_down_extent(cache, bytenr, num_bytes, 0); + pin_down_extent(trans, cache, bytenr, num_bytes, 0); /* remove us from the free space cache (if we're there at all) */ ret = btrfs_remove_free_space(cache, bytenr, num_bytes); @@ -3296,7 +3297,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(fs_info, buf->start); if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { - pin_down_extent(cache, buf->start, buf->len, 1); + pin_down_extent(trans, cache, buf->start, buf->len, 1); btrfs_put_block_group(cache); goto out; } @@ -4197,7 +4198,7 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, return -ENOSPC; } - ret = pin_down_extent(cache, start, len, 1); + ret = pin_down_extent(trans, cache, start, len, 1); btrfs_put_block_group(cache); return ret; } -- cgit From f2fb72983bdcf57adf7e272107408391ef3dbb2d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:16 +0200 Subject: btrfs: Mark pinned log extents as excluded In preparation to making pinned extents per-transaction ensure that log such extents are always excluded from caching. To achieve this in addition to marking them via btrfs_pin_extent_for_log_replay they also need to be marked with btrfs_add_excluded_extent to prevent log tree extent buffer being loaded by the free space caching thread. That's required since log tree blocks are not recorded in the extent tree, hence they always look free. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 425eefeaacf8..f97e631aaca5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2633,6 +2633,8 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; int ret; + btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes); + cache = btrfs_lookup_block_group(trans->fs_info, bytenr); if (!cache) return -EINVAL; @@ -2919,6 +2921,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { + clear_extent_bits(&fs_info->freed_extents[0], start, + end, EXTENT_UPTODATE); + clear_extent_bits(&fs_info->freed_extents[1], start, + end, EXTENT_UPTODATE); + } if (btrfs_test_opt(fs_info, DISCARD_SYNC)) ret = btrfs_discard_extent(fs_info, start, -- cgit From fe119a6eeb670585e29dbe3932e00ad29ae8f5f9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:18 +0200 Subject: btrfs: switch to per-transaction pinned extents This commit flips the switch to start tracking/processing pinned extents on a per-transaction basis. It mostly replaces all references from btrfs_fs_info::(pinned_extents|freed_extents[]) to btrfs_transaction::pinned_extents. Two notable modifications that warrant explicit mention are changing clean_pinned_extents to get a reference to the previously running transaction. The other one is removal of call to btrfs_destroy_pinned_extent since transactions are going to be cleaned in btrfs_cleanup_one_transaction. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 38 +++++++++++++++++++++++++------------- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/disk-io.c | 30 +++++------------------------- fs/btrfs/extent-io-tree.h | 4 ++-- fs/btrfs/extent-tree.c | 31 ++++++++----------------------- fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/transaction.c | 2 ++ fs/btrfs/transaction.h | 1 + include/trace/events/btrfs.h | 4 ++-- 9 files changed, 48 insertions(+), 68 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9fec78a8c759..b8f39a679064 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -460,7 +460,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end int ret; while (start < end) { - ret = find_first_extent_bit(info->pinned_extents, start, + ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, EXTENT_DIRTY | EXTENT_UPTODATE, NULL); @@ -1248,30 +1248,42 @@ out: return ret; } -static bool clean_pinned_extents(struct btrfs_block_group *bg) +static bool clean_pinned_extents(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_transaction *prev_trans = NULL; const u64 start = bg->start; const u64 end = start + bg->length - 1; int ret; + spin_lock(&fs_info->trans_lock); + if (trans->transaction->list.prev != &fs_info->trans_list) { + prev_trans = list_last_entry(&trans->transaction->list, + struct btrfs_transaction, list); + refcount_inc(&prev_trans->use_count); + } + spin_unlock(&fs_info->trans_lock); + /* * Hold the unused_bg_unpin_mutex lock to avoid racing with * btrfs_finish_extent_commit(). If we are at transaction N, another * task might be running finish_extent_commit() for the previous * transaction N - 1, and have seen a range belonging to the block - * group in freed_extents[] before we were able to clear the whole - * block group range from freed_extents[]. This means that task can - * lookup for the block group after we unpinned it from freed_extents - * and removed it, leading to a BUG_ON() at unpin_extent_range(). + * group in pinned_extents before we were able to clear the whole block + * group range from pinned_extents. This means that task can lookup for + * the block group after we unpinned it from pinned_extents and removed + * it, leading to a BUG_ON() at unpin_extent_range(). */ mutex_lock(&fs_info->unused_bg_unpin_mutex); - ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, - EXTENT_DIRTY); - if (ret) - goto err; + if (prev_trans) { + ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY); + if (ret) + goto err; + } - ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, + ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, EXTENT_DIRTY); if (ret) goto err; @@ -1380,7 +1392,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * We could have pending pinned extents for this block group, * just delete them, we don't care about them anymore. */ - if (!clean_pinned_extents(block_group)) + if (!clean_pinned_extents(trans, block_group)) goto end_trans; /* @@ -2890,7 +2902,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, &cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(info->pinned_extents, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 22d0cb0019d1..bb237d577725 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,8 +596,8 @@ struct btrfs_fs_info { /* keep track of unallocated space */ atomic64_t free_chunk_space; - struct extent_io_tree freed_extents[2]; - struct extent_io_tree *pinned_extents; + /* Track ranges which are used by log trees blocks/logged data extents */ + struct extent_io_tree excluded_extents; /* logical->physical extent mapping */ struct extent_map_tree mapping_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 194c98a61095..e1e111c8b08b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2075,10 +2075,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) btrfs_free_log_root_tree(NULL, fs_info); - btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); - } } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) @@ -2749,11 +2747,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->block_group_cache_tree = RB_ROOT; fs_info->first_logical_byte = (u64)-1; - extent_io_tree_init(fs_info, &fs_info->freed_extents[0], - IO_TREE_FS_INFO_FREED_EXTENTS0, NULL); - extent_io_tree_init(fs_info, &fs_info->freed_extents[1], - IO_TREE_FS_INFO_FREED_EXTENTS1, NULL); - fs_info->pinned_extents = &fs_info->freed_extents[0]; + extent_io_tree_init(fs_info, &fs_info->excluded_extents, + IO_TREE_FS_EXCLUDED_EXTENTS, NULL); set_bit(BTRFS_FS_BARRIER, &fs_info->flags); mutex_init(&fs_info->ordered_operations_mutex); @@ -4434,16 +4429,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, } static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, - struct extent_io_tree *pinned_extents) + struct extent_io_tree *unpin) { - struct extent_io_tree *unpin; u64 start; u64 end; int ret; - bool loop = true; - unpin = pinned_extents; -again: while (1) { struct extent_state *cached_state = NULL; @@ -4468,15 +4459,6 @@ again: cond_resched(); } - if (loop) { - if (unpin == &fs_info->freed_extents[0]) - unpin = &fs_info->freed_extents[1]; - else - unpin = &fs_info->freed_extents[0]; - loop = false; - goto again; - } - return 0; } @@ -4567,8 +4549,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); - btrfs_destroy_pinned_extent(fs_info, - fs_info->pinned_extents); + btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); @@ -4620,7 +4601,6 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) btrfs_destroy_all_ordered_extents(fs_info); btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); - btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); btrfs_destroy_all_delalloc_inodes(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index cc3037f9765e..b4a7bad3e82e 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -36,8 +36,8 @@ struct io_failure_record; #define CHUNK_TRIMMED EXTENT_DEFRAG enum { - IO_TREE_FS_INFO_FREED_EXTENTS0, - IO_TREE_FS_INFO_FREED_EXTENTS1, + IO_TREE_FS_PINNED_EXTENTS, + IO_TREE_FS_EXCLUDED_EXTENTS, IO_TREE_INODE_IO, IO_TREE_INODE_IO_FAILURE, IO_TREE_RELOC_BLOCKS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f97e631aaca5..136fffb76428 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -64,10 +64,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, u64 start, u64 num_bytes) { u64 end = start + num_bytes - 1; - set_extent_bits(&fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE); - set_extent_bits(&fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE); + set_extent_bits(&fs_info->excluded_extents, start, end, + EXTENT_UPTODATE); return 0; } @@ -79,10 +77,8 @@ void btrfs_free_excluded_extents(struct btrfs_block_group *cache) start = cache->start; end = start + cache->length - 1; - clear_extent_bits(&fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE); - clear_extent_bits(&fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE); + clear_extent_bits(&fs_info->excluded_extents, start, end, + EXTENT_UPTODATE); } static u64 generic_ref_to_space_flags(struct btrfs_ref *ref) @@ -2605,7 +2601,7 @@ static int pin_down_extent(struct btrfs_trans_handle *trans, percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(fs_info->pinned_extents, bytenr, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); return 0; } @@ -2761,11 +2757,6 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) } } - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) - fs_info->pinned_extents = &fs_info->freed_extents[1]; - else - fs_info->pinned_extents = &fs_info->freed_extents[0]; - up_write(&fs_info->commit_root_sem); btrfs_update_global_block_rsv(fs_info); @@ -2906,10 +2897,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) u64 end; int ret; - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) - unpin = &fs_info->freed_extents[1]; - else - unpin = &fs_info->freed_extents[0]; + unpin = &trans->transaction->pinned_extents; while (!TRANS_ABORTED(trans)) { struct extent_state *cached_state = NULL; @@ -2921,12 +2909,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } - if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { - clear_extent_bits(&fs_info->freed_extents[0], start, + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) + clear_extent_bits(&fs_info->excluded_extents, start, end, EXTENT_UPTODATE); - clear_extent_bits(&fs_info->freed_extents[1], start, - end, EXTENT_UPTODATE); - } if (btrfs_test_opt(fs_info, DISCARD_SYNC)) ret = btrfs_discard_extent(fs_info, start, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9d6372139547..bd9c4b5da549 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1086,7 +1086,7 @@ static noinline_for_stack int write_pinned_extent_entries( * We shouldn't have switched the pinned extents yet so this is the * right one */ - unpin = block_group->fs_info->pinned_extents; + unpin = &trans->transaction->pinned_extents; start = block_group->start; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 37680351b7c3..fdfdfc426539 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -336,6 +336,8 @@ loop: list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); + extent_io_tree_init(fs_info, &cur_trans->pinned_extents, + IO_TREE_FS_PINNED_EXTENTS, NULL); fs_info->generation++; cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 453cea7c7a72..31ae8d273065 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -71,6 +71,7 @@ struct btrfs_transaction { */ struct list_head io_bgs; struct list_head dropped_roots; + struct extent_io_tree pinned_extents; /* * we need to make sure block group deletion doesn't race with diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f1f2b6a04052..bcbc763b8814 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -81,8 +81,8 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); #define show_extent_io_tree_owner(owner) \ __print_symbolic(owner, \ - { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" }, \ - { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" }, \ + { IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS" }, \ + { IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS" }, \ { IO_TREE_INODE_IO, "INODE_IO" }, \ { IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \ { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ -- cgit From ab9b2c7b32e6be53cac2e23f5b2db66815a7d972 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:30 -0500 Subject: btrfs: handle logged extent failure properly If we're allocating a logged extent we attempt to insert an extent record for the file extent directly. We increase space_info->bytes_reserved, because the extent entry addition will call btrfs_update_block_group(), which will convert the ->bytes_reserved to ->bytes_used. However if we fail at any point while inserting the extent entry we will bail and leave space on ->bytes_reserved, which will trigger a WARN_ON() on umount. Fix this by pinning the space if we fail to insert, which is what happens in every other failure case that involves adding the extent entry. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Johannes Thumshirn Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 136fffb76428..7eef91d6c2b6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4422,7 +4422,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, offset, ins, 1); if (ret) - btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1); + btrfs_pin_extent(trans, ins->objectid, ins->offset, 1); btrfs_put_block_group(block_group); return ret; } -- cgit From dcc3eb9638c3c927f1597075e851d0a16300a876 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 30 Jan 2020 14:59:45 +0200 Subject: btrfs: convert snapshot/nocow exlcusion to drew lock This patch removes all haphazard code implementing nocow writers exclusion from pending snapshot creation and switches to using the drew lock to ensure this invariant still holds. 'Readers' are snapshot creators from create_snapshot and 'writers' are nocow writers from buffered write path or btrfs_setsize. This locking scheme allows for multiple snapshots to happen while any nocow writers are blocked, since writes to page cache in the nocow path will make snapshots inconsistent. So for performance reasons we'd like to have the ability to run multiple concurrent snapshots and also favors readers in this case. And in case there aren't pending snapshots (which will be the majority of the cases) we rely on the percpu's writers counter to avoid cacheline contention. The main gain from using the drew lock is it's now a lot easier to reason about the guarantees of the locking scheme and whether there is some silent breakage lurking. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 10 +++------- fs/btrfs/disk-io.c | 45 ++++++++++----------------------------------- fs/btrfs/extent-tree.c | 44 -------------------------------------------- fs/btrfs/file.c | 11 +++++------ fs/btrfs/inode.c | 8 ++++---- fs/btrfs/ioctl.c | 10 +++------- 6 files changed, 25 insertions(+), 103 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ab8151247b93..db9e872bcc79 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -957,11 +957,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) return sb->s_fs_info; } -struct btrfs_subvolume_writers { - struct percpu_counter counter; - wait_queue_head_t wait; -}; - /* * The state of btrfs root */ @@ -1133,8 +1128,9 @@ struct btrfs_root { * root_item_lock. */ int dedupe_in_progress; - struct btrfs_subvolume_writers *subv_writers; - atomic_t will_be_snapshotted; + /* For exclusion of snapshot creation and nocow writes */ + struct btrfs_drew_lock snapshot_lock; + atomic_t snapshot_force_cow; /* For qgroup metadata reserved space */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 770d469e1d9c..06819c41e4f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1104,32 +1104,6 @@ void btrfs_clean_tree_block(struct extent_buffer *buf) } } -static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) -{ - struct btrfs_subvolume_writers *writers; - int ret; - - writers = kmalloc(sizeof(*writers), GFP_NOFS); - if (!writers) - return ERR_PTR(-ENOMEM); - - ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS); - if (ret < 0) { - kfree(writers); - return ERR_PTR(ret); - } - - init_waitqueue_head(&writers->wait); - return writers; -} - -static void -btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) -{ - percpu_counter_destroy(&writers->counter); - kfree(writers); -} - static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { @@ -1178,7 +1152,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->log_writers, 0); atomic_set(&root->log_batch, 0); refcount_set(&root->refs, 1); - atomic_set(&root->will_be_snapshotted, 0); atomic_set(&root->snapshot_force_cow, 0); atomic_set(&root->nr_swapfiles, 0); root->log_transid = 0; @@ -1450,7 +1423,7 @@ alloc_fail: static int btrfs_init_fs_root(struct btrfs_root *root) { int ret; - struct btrfs_subvolume_writers *writers; + unsigned int nofs_flag; root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), @@ -1460,12 +1433,15 @@ static int btrfs_init_fs_root(struct btrfs_root *root) goto fail; } - writers = btrfs_alloc_subvolume_writers(); - if (IS_ERR(writers)) { - ret = PTR_ERR(writers); + /* + * We might be called under a transaction (e.g. indirect backref + * resolution) which could deadlock if it triggers memory reclaim + */ + nofs_flag = memalloc_nofs_save(); + ret = btrfs_drew_lock_init(&root->snapshot_lock); + memalloc_nofs_restore(nofs_flag); + if (ret) goto fail; - } - root->subv_writers = writers; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { set_bit(BTRFS_ROOT_REF_COWS, &root->state); @@ -3961,8 +3937,7 @@ void btrfs_free_fs_root(struct btrfs_root *root) WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (root->anon_dev) free_anon_bdev(root->anon_dev); - if (root->subv_writers) - btrfs_free_subvolume_writers(root->subv_writers); + btrfs_drew_lock_destroy(&root->snapshot_lock); free_extent_buffer(root->node); free_extent_buffer(root->commit_root); kfree(root->free_ino_ctl); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7eef91d6c2b6..9dcd70cc3ca3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5740,47 +5740,3 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) return bg_ret; return dev_ret; } - -/* - * btrfs_{start,end}_write_no_snapshotting() are similar to - * mnt_{want,drop}_write(), they are used to prevent some tasks from writing - * data into the page cache through nocow before the subvolume is snapshoted, - * but flush the data into disk after the snapshot creation, or to prevent - * operations while snapshotting is ongoing and that cause the snapshot to be - * inconsistent (writes followed by expanding truncates for example). - */ -void btrfs_end_write_no_snapshotting(struct btrfs_root *root) -{ - percpu_counter_dec(&root->subv_writers->counter); - cond_wake_up(&root->subv_writers->wait); -} - -int btrfs_start_write_no_snapshotting(struct btrfs_root *root) -{ - if (atomic_read(&root->will_be_snapshotted)) - return 0; - - percpu_counter_inc(&root->subv_writers->counter); - /* - * Make sure counter is updated before we check for snapshot creation. - */ - smp_mb(); - if (atomic_read(&root->will_be_snapshotted)) { - btrfs_end_write_no_snapshotting(root); - return 0; - } - return 1; -} - -void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) -{ - while (true) { - int ret; - - ret = btrfs_start_write_no_snapshotting(root); - if (ret) - break; - wait_var_event(&root->will_be_snapshotted, - !atomic_read(&root->will_be_snapshotted)); - } -} diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fd52ad00b6c8..8a974a82be51 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1553,8 +1553,7 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, u64 num_bytes; int ret; - ret = btrfs_start_write_no_snapshotting(root); - if (!ret) + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); @@ -1569,7 +1568,7 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, NULL, NULL, NULL); if (ret <= 0) { ret = 0; - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); } else { *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); @@ -1675,7 +1674,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, data_reserved, pos, write_bytes); else - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); break; } @@ -1779,7 +1778,7 @@ again: release_bytes = 0; if (only_release_metadata) - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); if (only_release_metadata && copied > 0) { lockstart = round_down(pos, @@ -1808,7 +1807,7 @@ again: if (release_bytes) { if (only_release_metadata) { - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); btrfs_delalloc_release_metadata(BTRFS_I(inode), release_bytes, true); } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 254f5ea17e40..1e138c83cc6e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4726,16 +4726,16 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) * truncation, it must capture all writes that happened before * this truncation. */ - btrfs_wait_for_snapshot_creation(root); + btrfs_drew_write_lock(&root->snapshot_lock); ret = btrfs_cont_expand(inode, oldsize, newsize); if (ret) { - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); return ret; } trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); return PTR_ERR(trans); } @@ -4743,7 +4743,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) btrfs_inode_safe_disk_i_size_write(inode, 0); pagecache_isize_extended(inode, oldsize, newsize); ret = btrfs_update_inode(trans, root, inode); - btrfs_end_write_no_snapshotting(root); + btrfs_drew_write_unlock(&root->snapshot_lock); btrfs_end_transaction(trans); } else { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 56bd3ea7fb67..6ded5e346821 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -791,11 +791,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, * possible. This is to avoid later writeback (running dealloc) to * fallback to COW mode and unexpectedly fail with ENOSPC. */ - atomic_inc(&root->will_be_snapshotted); - smp_mb__after_atomic(); - /* wait for no snapshot writes */ - wait_event(root->subv_writers->wait, - percpu_counter_sum(&root->subv_writers->counter) == 0); + btrfs_drew_read_lock(&root->snapshot_lock); ret = btrfs_start_delalloc_snapshot(root); if (ret) @@ -876,8 +872,8 @@ fail: dec_and_free: if (snapshot_force_cow) atomic_dec(&root->snapshot_force_cow); - if (atomic_dec_and_test(&root->will_be_snapshotted)) - wake_up_var(&root->will_be_snapshotted); + btrfs_drew_read_unlock(&root->snapshot_lock); + free_pending: kfree(pending_snapshot->root_item); btrfs_free_path(pending_snapshot->path); -- cgit From 42c9d0b524cf9af180dcb788a938cdc4c678e8cb Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Mar 2019 11:54:13 +0100 Subject: btrfs: simplify parameters of btrfs_set_disk_extent_flags All callers pass extent buffer start and length so the extent buffer itself should work fine. Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 4 +--- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 7 +++---- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b62721ac5ee8..f948435e87df 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -925,9 +925,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (new_flags != 0) { int level = btrfs_header_level(buf); - ret = btrfs_set_disk_extent_flags(trans, - buf->start, - buf->len, + ret = btrfs_set_disk_extent_flags(trans, buf, new_flags, level, 0); if (ret) return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1cde3f1d8f20..ea5d0675465a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2482,7 +2482,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref); int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 flags, + struct extent_buffer *eb, u64 flags, int level, int is_data); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9dcd70cc3ca3..161274118853 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2231,7 +2231,7 @@ out: } int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 flags, + struct extent_buffer *eb, u64 flags, int level, int is_data) { struct btrfs_delayed_extent_op *extent_op; @@ -2247,7 +2247,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, extent_op->is_data = is_data ? true : false; extent_op->level = level; - ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); + ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op); if (ret) btrfs_free_delayed_extent_op(extent_op); return ret; @@ -4741,8 +4741,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, BUG_ON(ret); /* -ENOMEM */ ret = btrfs_dec_ref(trans, root, eb, 0); BUG_ON(ret); /* -ENOMEM */ - ret = btrfs_set_disk_extent_flags(trans, eb->start, - eb->len, flag, + ret = btrfs_set_disk_extent_flags(trans, eb, flag, btrfs_header_level(eb), 0); BUG_ON(ret); /* -ENOMEM */ wc->flags[level] |= flag; -- cgit From 52d40aba68dcc42ba3f2846eedf9cb1c6d5ce45f Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:06 +0900 Subject: btrfs: change full_search to bool in find_free_extent_update_loop While the "full_search" variable defined in find_free_extent() is bool, but the full_search argument of find_free_extent_update_loop() is defined as int. Let's trivially fix the argument type. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 161274118853..9d5563a63a1f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3661,7 +3661,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, struct btrfs_free_cluster *last_ptr, struct btrfs_key *ins, struct find_free_extent_ctl *ffe_ctl, - int full_search, bool use_cluster) + bool full_search, bool use_cluster) { struct btrfs_root *root = fs_info->extent_root; int ret; -- cgit From cb2f96f8ab68812fa3e6f32ec763fb15ae6395c6 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:16 +0900 Subject: btrfs: introduce extent allocation policy This commit introduces extent allocation policy for btrfs. This policy controls how btrfs allocate an extents from block groups. There is no functional change introduced with this commit. Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9d5563a63a1f..bd0d13661ea7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3427,6 +3427,10 @@ btrfs_release_block_group(struct btrfs_block_group *cache, btrfs_put_block_group(cache); } +enum btrfs_extent_allocation_policy { + BTRFS_EXTENT_ALLOC_CLUSTERED, +}; + /* * Structure used internally for find_free_extent() function. Wraps needed * parameters. @@ -3478,6 +3482,9 @@ struct find_free_extent_ctl { /* Found result */ u64 found_offset; + + /* Allocation policy */ + enum btrfs_extent_allocation_policy policy; }; @@ -3815,6 +3822,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, ffe_ctl.have_caching_bg = false; ffe_ctl.orig_have_caching_bg = false; ffe_ctl.found_offset = 0; + ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; ins->type = BTRFS_EXTENT_ITEM_KEY; ins->objectid = 0; -- cgit From ea544149a49f3aad4b6f164507608e59c12bbc11 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:17 +0900 Subject: btrfs: move hint_byte into find_free_extent_ctl This commit moves hint_byte into find_free_extent_ctl, so that we can modify the hint_byte in the other functions. This will help us split find_free_extent further. This commit also renames the function argument "hint_byte" to "hint_byte_orig" to avoid misuse. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bd0d13661ea7..4f68291d03ea 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3483,6 +3483,9 @@ struct find_free_extent_ctl { /* Found result */ u64 found_offset; + /* Hint where to start looking for an empty space */ + u64 hint_byte; + /* Allocation policy */ enum btrfs_extent_allocation_policy policy; }; @@ -3797,7 +3800,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, */ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, u64 ram_bytes, u64 num_bytes, u64 empty_size, - u64 hint_byte, struct btrfs_key *ins, + u64 hint_byte_orig, struct btrfs_key *ins, u64 flags, int delalloc) { int ret = 0; @@ -3822,6 +3825,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, ffe_ctl.have_caching_bg = false; ffe_ctl.orig_have_caching_bg = false; ffe_ctl.found_offset = 0; + ffe_ctl.hint_byte = hint_byte_orig; ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; ins->type = BTRFS_EXTENT_ITEM_KEY; @@ -3864,14 +3868,14 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, if (last_ptr) { spin_lock(&last_ptr->lock); if (last_ptr->block_group) - hint_byte = last_ptr->window_start; + ffe_ctl.hint_byte = last_ptr->window_start; if (last_ptr->fragmented) { /* * We still set window_start so we can keep track of the * last place we found an allocation to try and save * some time. */ - hint_byte = last_ptr->window_start; + ffe_ctl.hint_byte = last_ptr->window_start; use_cluster = false; } spin_unlock(&last_ptr->lock); @@ -3879,8 +3883,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, ffe_ctl.search_start = max(ffe_ctl.search_start, first_logical_byte(fs_info, 0)); - ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte); - if (ffe_ctl.search_start == hint_byte) { + ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte); + if (ffe_ctl.search_start == ffe_ctl.hint_byte) { block_group = btrfs_lookup_block_group(fs_info, ffe_ctl.search_start); /* -- cgit From c10859be9b96815e28637e1e0fbd950802929f4f Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:18 +0900 Subject: btrfs: move variables for clustered allocation into find_free_extent_ctl Move "last_ptr" and "use_cluster" into struct find_free_extent_ctl, so that hook functions for clustered allocator can use these variables. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4f68291d03ea..0d6e2529a75b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3447,6 +3447,8 @@ struct find_free_extent_ctl { /* For clustered allocation */ u64 empty_cluster; + struct btrfs_free_cluster *last_ptr; + bool use_cluster; bool have_caching_bg; bool orig_have_caching_bg; @@ -3805,11 +3807,9 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, { int ret = 0; int cache_block_group_error = 0; - struct btrfs_free_cluster *last_ptr = NULL; struct btrfs_block_group *block_group = NULL; struct find_free_extent_ctl ffe_ctl = {0}; struct btrfs_space_info *space_info; - bool use_cluster = true; bool full_search = false; WARN_ON(num_bytes < fs_info->sectorsize); @@ -3818,8 +3818,6 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, ffe_ctl.empty_size = empty_size; ffe_ctl.flags = flags; ffe_ctl.search_start = 0; - ffe_ctl.retry_clustered = false; - ffe_ctl.retry_unclustered = false; ffe_ctl.delalloc = delalloc; ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags); ffe_ctl.have_caching_bg = false; @@ -3828,6 +3826,12 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, ffe_ctl.hint_byte = hint_byte_orig; ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; + /* For clustered allocation */ + ffe_ctl.retry_clustered = false; + ffe_ctl.retry_unclustered = false; + ffe_ctl.last_ptr = NULL; + ffe_ctl.use_cluster = true; + ins->type = BTRFS_EXTENT_ITEM_KEY; ins->objectid = 0; ins->offset = 0; @@ -3858,14 +3862,16 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, spin_unlock(&space_info->lock); return -ENOSPC; } else if (space_info->max_extent_size) { - use_cluster = false; + ffe_ctl.use_cluster = false; } spin_unlock(&space_info->lock); } - last_ptr = fetch_cluster_info(fs_info, space_info, - &ffe_ctl.empty_cluster); - if (last_ptr) { + ffe_ctl.last_ptr = fetch_cluster_info(fs_info, space_info, + &ffe_ctl.empty_cluster); + if (ffe_ctl.last_ptr) { + struct btrfs_free_cluster *last_ptr = ffe_ctl.last_ptr; + spin_lock(&last_ptr->lock); if (last_ptr->block_group) ffe_ctl.hint_byte = last_ptr->window_start; @@ -3876,7 +3882,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, * some time. */ ffe_ctl.hint_byte = last_ptr->window_start; - use_cluster = false; + ffe_ctl.use_cluster = false; } spin_unlock(&last_ptr->lock); } @@ -3989,10 +3995,11 @@ have_block_group: * Ok we want to try and use the cluster allocator, so * lets look there */ - if (last_ptr && use_cluster) { + if (ffe_ctl.last_ptr && ffe_ctl.use_cluster) { struct btrfs_block_group *cluster_bg = NULL; - ret = find_free_extent_clustered(block_group, last_ptr, + ret = find_free_extent_clustered(block_group, + ffe_ctl.last_ptr, &ffe_ctl, &cluster_bg); if (ret == 0) { @@ -4010,8 +4017,8 @@ have_block_group: /* ret == -ENOENT case falls through */ } - ret = find_free_extent_unclustered(block_group, last_ptr, - &ffe_ctl); + ret = find_free_extent_unclustered(block_group, + ffe_ctl.last_ptr, &ffe_ctl); if (ret == -EAGAIN) goto have_block_group; else if (ret > 0) @@ -4060,8 +4067,9 @@ loop: } up_read(&space_info->groups_sem); - ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl, - full_search, use_cluster); + ret = find_free_extent_update_loop(fs_info, ffe_ctl.last_ptr, ins, + &ffe_ctl, full_search, + ffe_ctl.use_cluster); if (ret > 0) goto search; -- cgit From c668690dc035e9a8597a7e60feeb85d1b1e581e6 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:19 +0900 Subject: btrfs: factor out do_allocation() for extent allocation Factor out do_allocation() from find_free_extent(). This function do an actual extent allocation in a given block group. The ffe_ctl->policy is used to determine the actual allocator function to use. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 75 +++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0d6e2529a75b..49a682038d1b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3664,6 +3664,37 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg, return 0; } +static int do_allocation_clustered(struct btrfs_block_group *block_group, + struct find_free_extent_ctl *ffe_ctl, + struct btrfs_block_group **bg_ret) +{ + int ret; + + /* We want to try and use the cluster allocator, so lets look there */ + if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) { + ret = find_free_extent_clustered(block_group, ffe_ctl->last_ptr, + ffe_ctl, bg_ret); + if (ret >= 0 || ret == -EAGAIN) + return ret; + /* ret == -ENOENT case falls through */ + } + + return find_free_extent_unclustered(block_group, ffe_ctl->last_ptr, + ffe_ctl); +} + +static int do_allocation(struct btrfs_block_group *block_group, + struct find_free_extent_ctl *ffe_ctl, + struct btrfs_block_group **bg_ret) +{ + switch (ffe_ctl->policy) { + case BTRFS_EXTENT_ALLOC_CLUSTERED: + return do_allocation_clustered(block_group, ffe_ctl, bg_ret); + default: + BUG(); + } +} + /* * Return >0 means caller needs to re-search for free extent * Return 0 means we have the needed free extent. @@ -3931,6 +3962,8 @@ search: down_read(&space_info->groups_sem); list_for_each_entry(block_group, &space_info->block_groups[ffe_ctl.index], list) { + struct btrfs_block_group *bg_ret; + /* If the block group is read-only, we can skip it entirely. */ if (unlikely(block_group->ro)) continue; @@ -3991,40 +4024,20 @@ have_block_group: if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) goto loop; - /* - * Ok we want to try and use the cluster allocator, so - * lets look there - */ - if (ffe_ctl.last_ptr && ffe_ctl.use_cluster) { - struct btrfs_block_group *cluster_bg = NULL; - - ret = find_free_extent_clustered(block_group, - ffe_ctl.last_ptr, - &ffe_ctl, &cluster_bg); - - if (ret == 0) { - if (cluster_bg && cluster_bg != block_group) { - btrfs_release_block_group(block_group, - delalloc); - block_group = cluster_bg; - } - goto checks; - } else if (ret == -EAGAIN) { - goto have_block_group; - } else if (ret > 0) { - goto loop; + bg_ret = NULL; + ret = do_allocation(block_group, &ffe_ctl, &bg_ret); + if (ret == 0) { + if (bg_ret && bg_ret != block_group) { + btrfs_release_block_group(block_group, delalloc); + block_group = bg_ret; } - /* ret == -ENOENT case falls through */ - } - - ret = find_free_extent_unclustered(block_group, - ffe_ctl.last_ptr, &ffe_ctl); - if (ret == -EAGAIN) + } else if (ret == -EAGAIN) { goto have_block_group; - else if (ret > 0) + } else if (ret > 0) { goto loop; - /* ret == 0 case falls through */ -checks: + } + + /* Checks */ ffe_ctl.search_start = round_up(ffe_ctl.found_offset, fs_info->stripesize); -- cgit From 897cae7948cb06c2f98de3c6f1833e7d8b3daa02 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:20 +0900 Subject: btrfs: drop unnecessary arguments from clustered allocation functions Now that, find_free_extent_clustered() and find_free_extent_unclustered() can access "last_ptr" from the "clustered" variable, we can drop it from the arguments. Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49a682038d1b..93d5b5c48185 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3502,11 +3502,11 @@ struct find_free_extent_ctl { * Return 0 means we have found a location and set ffe_ctl->found_offset. */ static int find_free_extent_clustered(struct btrfs_block_group *bg, - struct btrfs_free_cluster *last_ptr, - struct find_free_extent_ctl *ffe_ctl, - struct btrfs_block_group **cluster_bg_ret) + struct find_free_extent_ctl *ffe_ctl, + struct btrfs_block_group **cluster_bg_ret) { struct btrfs_block_group *cluster_bg; + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; u64 aligned_cluster; u64 offset; int ret; @@ -3606,9 +3606,9 @@ refill_cluster: * Return -EAGAIN to inform caller that we need to re-search this block group */ static int find_free_extent_unclustered(struct btrfs_block_group *bg, - struct btrfs_free_cluster *last_ptr, - struct find_free_extent_ctl *ffe_ctl) + struct find_free_extent_ctl *ffe_ctl) { + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; u64 offset; /* @@ -3672,15 +3672,13 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group, /* We want to try and use the cluster allocator, so lets look there */ if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) { - ret = find_free_extent_clustered(block_group, ffe_ctl->last_ptr, - ffe_ctl, bg_ret); + ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret); if (ret >= 0 || ret == -EAGAIN) return ret; /* ret == -ENOENT case falls through */ } - return find_free_extent_unclustered(block_group, ffe_ctl->last_ptr, - ffe_ctl); + return find_free_extent_unclustered(block_group, ffe_ctl); } static int do_allocation(struct btrfs_block_group *block_group, -- cgit From baba50624fe57320837cf2aa566769385872d2af Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:21 +0900 Subject: btrfs: factor out release_block_group() Factor out release_block_group() from find_free_extent(). This function is called when it gives up an allocation from a block group. Each allocation policy should reset its information for an allocation in the next block group. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 93d5b5c48185..6d4f53e92833 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3693,6 +3693,24 @@ static int do_allocation(struct btrfs_block_group *block_group, } } +static void release_block_group(struct btrfs_block_group *block_group, + struct find_free_extent_ctl *ffe_ctl, + int delalloc) +{ + switch (ffe_ctl->policy) { + case BTRFS_EXTENT_ALLOC_CLUSTERED: + ffe_ctl->retry_clustered = false; + ffe_ctl->retry_unclustered = false; + break; + default: + BUG(); + } + + BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) != + ffe_ctl->index); + btrfs_release_block_group(block_group, delalloc); +} + /* * Return >0 means caller needs to re-search for free extent * Return 0 means we have the needed free extent. @@ -4069,11 +4087,7 @@ have_block_group: btrfs_release_block_group(block_group, delalloc); break; loop: - ffe_ctl.retry_clustered = false; - ffe_ctl.retry_unclustered = false; - BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) != - ffe_ctl.index); - btrfs_release_block_group(block_group, delalloc); + release_block_group(block_group, &ffe_ctl, delalloc); cond_resched(); } up_read(&space_info->groups_sem); -- cgit From 0ab9724bf5f320f79d6f7e214f4fd1f95449a917 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:22 +0900 Subject: btrfs: factor out found_extent() for extent allocation Factor out found_extent() from find_free_extent_update_loop(). This function is called when a proper extent is found and before returning from find_free_extent(). Hook functions like found_extent_clustered() should save information for a next allocation. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6d4f53e92833..dfa88a795c3f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3711,6 +3711,30 @@ static void release_block_group(struct btrfs_block_group *block_group, btrfs_release_block_group(block_group, delalloc); } +static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl, + struct btrfs_key *ins) +{ + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; + + if (!ffe_ctl->use_cluster && last_ptr) { + spin_lock(&last_ptr->lock); + last_ptr->window_start = ins->objectid; + spin_unlock(&last_ptr->lock); + } +} + +static void found_extent(struct find_free_extent_ctl *ffe_ctl, + struct btrfs_key *ins) +{ + switch (ffe_ctl->policy) { + case BTRFS_EXTENT_ALLOC_CLUSTERED: + found_extent_clustered(ffe_ctl, ins); + break; + default: + BUG(); + } +} + /* * Return >0 means caller needs to re-search for free extent * Return 0 means we have the needed free extent. @@ -3737,11 +3761,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, return 1; if (ins->objectid) { - if (!use_cluster && last_ptr) { - spin_lock(&last_ptr->lock); - last_ptr->window_start = ins->objectid; - spin_unlock(&last_ptr->lock); - } + found_extent(ffe_ctl, ins); return 0; } -- cgit From 15b7ee6584c66f21f0a9aa7f7a6f3bfda5c5ad34 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:23 +0900 Subject: btrfs: drop unnecessary arguments from find_free_extent_update_loop() Now that, we don't use last_ptr and use_cluster in the function. Drop these arguments from it. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dfa88a795c3f..054fc6c7be84 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3741,10 +3741,9 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl, * Return <0 means we failed to locate any free extent. */ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, - struct btrfs_free_cluster *last_ptr, struct btrfs_key *ins, struct find_free_extent_ctl *ffe_ctl, - bool full_search, bool use_cluster) + bool full_search) { struct btrfs_root *root = fs_info->extent_root; int ret; @@ -4112,9 +4111,7 @@ loop: } up_read(&space_info->groups_sem); - ret = find_free_extent_update_loop(fs_info, ffe_ctl.last_ptr, ins, - &ffe_ctl, full_search, - ffe_ctl.use_cluster); + ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search); if (ret > 0) goto search; -- cgit From c70e2139dc81b35c7d86a5d550490f7eb1451c2e Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:24 +0900 Subject: btrfs: factor out chunk_allocation_failed() for extent allocation Factor out chunk_allocation_failed() from find_free_extent_update_loop(). This function is called when it failed to allocate a chunk. The function can modify "ffe_ctl->loop" and return 0 to continue with the next stage. Or, it can return -ENOSPC to give up here. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 054fc6c7be84..bcffb96d87c6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3735,6 +3735,21 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl, } } +static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl) +{ + switch (ffe_ctl->policy) { + case BTRFS_EXTENT_ALLOC_CLUSTERED: + /* + * If we can't allocate a new chunk we've already looped through + * at least once, move on to the NO_EMPTY_SIZE case. + */ + ffe_ctl->loop = LOOP_NO_EMPTY_SIZE; + return 0; + default: + BUG(); + } +} + /* * Return >0 means caller needs to re-search for free extent * Return 0 means we have the needed free extent. @@ -3806,16 +3821,10 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, CHUNK_ALLOC_FORCE); - /* - * If we can't allocate a new chunk we've already looped - * through at least once, move on to the NO_EMPTY_SIZE - * case. - */ - if (ret == -ENOSPC) - ffe_ctl->loop = LOOP_NO_EMPTY_SIZE; - /* Do not bail out on ENOSPC since we can do more. */ - if (ret < 0 && ret != -ENOSPC) + if (ret == -ENOSPC) + ret = chunk_allocation_failed(ffe_ctl); + else if (ret < 0) btrfs_abort_transaction(trans, ret); else ret = 0; -- cgit From 45d8e033b2354e74a3f80cffec0b0b1aa099f4c7 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:25 +0900 Subject: btrfs: skip LOOP_NO_EMPTY_SIZE if not clustered allocation LOOP_NO_EMPTY_SIZE is solely dedicated for clustered allocation. So, we can skip this stage and give up the allocation. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bcffb96d87c6..1d895df67b3b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3835,6 +3835,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, } if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) { + if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED) + return -ENOSPC; + /* * Don't loop again if we already have no empty_size and * no empty_cluster. -- cgit From 7e895409421534bc7a0d8d79a81be4533d70ba8f Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 25 Feb 2020 12:56:26 +0900 Subject: btrfs: factor out prepare_allocation() for extent allocation This function finally factor out prepare_allocation() form find_free_extent(). This function is called before the allocation loop and a specific allocator function like prepare_allocation_clustered() should initialize their private information and can set proper hint_byte to indicate where to start the allocation with. Reviewed-by: Josef Bacik Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 110 ++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 42 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1d895df67b3b..36374b86529c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3853,6 +3853,71 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, return -ENOSPC; } +static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl, + struct btrfs_space_info *space_info, + struct btrfs_key *ins) +{ + /* + * If our free space is heavily fragmented we may not be able to make + * big contiguous allocations, so instead of doing the expensive search + * for free space, simply return ENOSPC with our max_extent_size so we + * can go ahead and search for a more manageable chunk. + * + * If our max_extent_size is large enough for our allocation simply + * disable clustering since we will likely not be able to find enough + * space to create a cluster and induce latency trying. + */ + if (space_info->max_extent_size) { + spin_lock(&space_info->lock); + if (space_info->max_extent_size && + ffe_ctl->num_bytes > space_info->max_extent_size) { + ins->offset = space_info->max_extent_size; + spin_unlock(&space_info->lock); + return -ENOSPC; + } else if (space_info->max_extent_size) { + ffe_ctl->use_cluster = false; + } + spin_unlock(&space_info->lock); + } + + ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info, + &ffe_ctl->empty_cluster); + if (ffe_ctl->last_ptr) { + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; + + spin_lock(&last_ptr->lock); + if (last_ptr->block_group) + ffe_ctl->hint_byte = last_ptr->window_start; + if (last_ptr->fragmented) { + /* + * We still set window_start so we can keep track of the + * last place we found an allocation to try and save + * some time. + */ + ffe_ctl->hint_byte = last_ptr->window_start; + ffe_ctl->use_cluster = false; + } + spin_unlock(&last_ptr->lock); + } + + return 0; +} + +static int prepare_allocation(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl, + struct btrfs_space_info *space_info, + struct btrfs_key *ins) +{ + switch (ffe_ctl->policy) { + case BTRFS_EXTENT_ALLOC_CLUSTERED: + return prepare_allocation_clustered(fs_info, ffe_ctl, + space_info, ins); + default: + BUG(); + } +} + /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: @@ -3922,48 +3987,9 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, return -ENOSPC; } - /* - * If our free space is heavily fragmented we may not be able to make - * big contiguous allocations, so instead of doing the expensive search - * for free space, simply return ENOSPC with our max_extent_size so we - * can go ahead and search for a more manageable chunk. - * - * If our max_extent_size is large enough for our allocation simply - * disable clustering since we will likely not be able to find enough - * space to create a cluster and induce latency trying. - */ - if (unlikely(space_info->max_extent_size)) { - spin_lock(&space_info->lock); - if (space_info->max_extent_size && - num_bytes > space_info->max_extent_size) { - ins->offset = space_info->max_extent_size; - spin_unlock(&space_info->lock); - return -ENOSPC; - } else if (space_info->max_extent_size) { - ffe_ctl.use_cluster = false; - } - spin_unlock(&space_info->lock); - } - - ffe_ctl.last_ptr = fetch_cluster_info(fs_info, space_info, - &ffe_ctl.empty_cluster); - if (ffe_ctl.last_ptr) { - struct btrfs_free_cluster *last_ptr = ffe_ctl.last_ptr; - - spin_lock(&last_ptr->lock); - if (last_ptr->block_group) - ffe_ctl.hint_byte = last_ptr->window_start; - if (last_ptr->fragmented) { - /* - * We still set window_start so we can keep track of the - * last place we found an allocation to try and save - * some time. - */ - ffe_ctl.hint_byte = last_ptr->window_start; - ffe_ctl.use_cluster = false; - } - spin_unlock(&last_ptr->lock); - } + ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins); + if (ret < 0) + return ret; ffe_ctl.search_start = max(ffe_ctl.search_start, first_logical_byte(fs_info, 0)); -- cgit From 65cd6d9e30fa85922cb073625665807130d01e70 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 4 Mar 2020 11:33:53 +0200 Subject: btrfs: Open code insert_extent_backref No need to add a level of indirection for hiding a simple 'if'. Open code insert_extent_backref in its sole caller. No functional changes. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 36374b86529c..a3778937d705 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1189,24 +1189,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, return ret; } -static int insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_path *path, - u64 bytenr, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int refs_to_add) -{ - int ret; - if (owner < BTRFS_FIRST_FREE_OBJECTID) { - BUG_ON(refs_to_add != 1); - ret = insert_tree_block_ref(trans, path, bytenr, parent, - root_objectid); - } else { - ret = insert_extent_data_ref(trans, path, bytenr, parent, - root_objectid, owner, offset, - refs_to_add); - } - return ret; -} - static int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, @@ -1493,8 +1475,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path->reada = READA_FORWARD; path->leave_spinning = 1; /* now insert the actual backref */ - ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid, - owner, offset, refs_to_add); + if (owner < BTRFS_FIRST_FREE_OBJECTID) { + BUG_ON(refs_to_add != 1); + ret = insert_tree_block_ref(trans, path, bytenr, parent, + root_objectid); + } else { + ret = insert_extent_data_ref(trans, path, bytenr, parent, + root_objectid, owner, offset, + refs_to_add); + } if (ret) btrfs_abort_transaction(trans, ret); out: -- cgit From 0078a9f941d2a994d756c330f225e888c31c768d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 10 Mar 2020 11:43:51 +0200 Subject: btrfs: Remove block_rsv parameter from btrfs_drop_snapshot It's no longer used following 30d40577e322 ("btrfs: reloc: Also queue orphan reloc tree for cleanup to avoid BUG_ON()"), so just remove it. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ++--- fs/btrfs/extent-tree.c | 9 +-------- fs/btrfs/relocation.c | 4 ++-- fs/btrfs/transaction.c | 4 ++-- 4 files changed, 7 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e490cfd70bba..af7229eac02b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2657,9 +2657,8 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) return btrfs_next_old_item(root, p, 0); } int btrfs_leaf_free_space(struct extent_buffer *leaf); -int __must_check btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - int update_ref, int for_reloc); +int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, + int for_reloc); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a3778937d705..001605b672b8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5288,9 +5288,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * * If called with for_reloc == 0, may exit early with -EAGAIN */ -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref, - int for_reloc) +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; @@ -5329,9 +5327,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, if (err) goto out_end_trans; - if (block_rsv) - trans->block_rsv = block_rsv; - /* * This will help us catch people modifying the fs tree while we're * dropping it. It is unsafe to mess with the fs tree while it's being @@ -5459,8 +5454,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, err = PTR_ERR(trans); goto out_free; } - if (block_rsv) - trans->block_rsv = block_rsv; } } btrfs_release_path(path); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 782c9e90fa6f..5dfb7ddbb4ab 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2276,7 +2276,7 @@ static int clean_dirty_subvols(struct reloc_control *rc) root->reloc_root = NULL; if (reloc_root) { - ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1); + ret2 = btrfs_drop_snapshot(reloc_root, 0, 1); if (ret2 < 0 && !ret) ret = ret2; } @@ -2289,7 +2289,7 @@ static int clean_dirty_subvols(struct reloc_control *rc) btrfs_put_root(root); } else { /* Orphan reloc tree, just clean it up */ - ret2 = btrfs_drop_snapshot(root, NULL, 0, 1); + ret2 = btrfs_drop_snapshot(root, 0, 1); if (ret2 < 0 && !ret) ret = ret2; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 096c0aab34ee..5939bca9d5eb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2427,9 +2427,9 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) if (btrfs_header_backref_rev(root->node) < BTRFS_MIXED_BACKREF_REV) - ret = btrfs_drop_snapshot(root, NULL, 0, 0); + ret = btrfs_drop_snapshot(root, 0, 0); else - ret = btrfs_drop_snapshot(root, NULL, 1, 0); + ret = btrfs_drop_snapshot(root, 1, 0); return (ret < 0) ? 0 : 1; } -- cgit From 8c38938c7bb096313ad00c2bafa82af37636b0ec Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 14 Feb 2020 16:11:42 -0500 Subject: btrfs: move the root freeing stuff into btrfs_put_root There are a few different ways to free roots, either you allocated them yourself and you just do free_extent_buffer(root->node); free_extent_buffer(root->commit_node); btrfs_put_root(root); Which is the pattern for log roots. Or for snapshots/subvolumes that are being dropped you simply call btrfs_free_fs_root() which does all the cleanup for you. Unify this all into btrfs_put_root(), so that we don't free up things associated with the root until the last reference is dropped. This makes the root freeing code much more significant. The only caveat is at close_ctree() time we have to free the extent buffers for all of our main roots (extent_root, chunk_root, etc) because we have to drop the btree_inode and we'll run into issues if we hold onto those nodes until ->kill_sb() time. This will be addressed in the future when we kill the btree_inode. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 63 ++++++++++++++++++++++---------------------- fs/btrfs/disk-io.h | 16 +---------- fs/btrfs/extent-tree.c | 7 ++--- fs/btrfs/extent_io.c | 16 ++++++++--- fs/btrfs/free-space-tree.c | 2 -- fs/btrfs/qgroup.c | 7 +---- fs/btrfs/relocation.c | 4 --- fs/btrfs/tests/btrfs-tests.c | 5 +--- fs/btrfs/tree-log.c | 6 ----- 9 files changed, 50 insertions(+), 76 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 37418d1d0820..242c072d69a7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1255,11 +1255,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, return root; fail: - if (leaf) { + if (leaf) btrfs_tree_unlock(leaf); - free_extent_buffer(root->commit_root); - free_extent_buffer(leaf); - } btrfs_put_root(root); return ERR_PTR(ret); @@ -1382,10 +1379,10 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, generation, level, NULL); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); + root->node = NULL; goto find_fail; } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { ret = -EIO; - free_extent_buffer(root->node); goto find_fail; } root->commit_root = btrfs_root_node(root); @@ -1617,14 +1614,14 @@ again: if (ret) { btrfs_put_root(root); if (ret == -EEXIST) { - btrfs_free_fs_root(root); + btrfs_put_root(root); goto again; } goto fail; } return root; fail: - btrfs_free_fs_root(root); + btrfs_put_root(root); return ERR_PTR(ret); } @@ -1996,11 +1993,35 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) free_root_extent_buffers(info->csum_root); free_root_extent_buffers(info->quota_root); free_root_extent_buffers(info->uuid_root); + free_root_extent_buffers(info->fs_root); if (free_chunk_root) free_root_extent_buffers(info->chunk_root); free_root_extent_buffers(info->free_space_root); } +void btrfs_put_root(struct btrfs_root *root) +{ + if (!root) + return; + + if (refcount_dec_and_test(&root->refs)) { + WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + if (root->anon_dev) + free_anon_bdev(root->anon_dev); + btrfs_drew_lock_destroy(&root->snapshot_lock); + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root->free_ino_ctl); + kfree(root->free_ino_pinned); +#ifdef CONFIG_BTRFS_DEBUG + spin_lock(&root->fs_info->fs_roots_radix_lock); + list_del_init(&root->leak_list); + spin_unlock(&root->fs_info->fs_roots_radix_lock); +#endif + kfree(root); + } +} + void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) { int ret; @@ -2012,13 +2033,10 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root, root_list); list_del(&gang[0]->root_list); - if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) { + if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) btrfs_drop_and_free_fs_root(fs_info, gang[0]); - } else { - free_extent_buffer(gang[0]->node); - free_extent_buffer(gang[0]->commit_root); + else btrfs_put_root(gang[0]); - } } while (1) { @@ -2223,11 +2241,11 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); + log_tree_root->node = NULL; btrfs_put_root(log_tree_root); return ret; } else if (!extent_buffer_uptodate(log_tree_root->node)) { btrfs_err(fs_info, "failed to read log tree"); - free_extent_buffer(log_tree_root->node); btrfs_put_root(log_tree_root); return -EIO; } @@ -2236,7 +2254,6 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, if (ret) { btrfs_handle_fs_error(fs_info, ret, "Failed to recover log tree"); - free_extent_buffer(log_tree_root->node); btrfs_put_root(log_tree_root); return ret; } @@ -3901,8 +3918,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log(NULL, root); if (root->reloc_root) { - free_extent_buffer(root->reloc_root->node); - free_extent_buffer(root->reloc_root->commit_root); btrfs_put_root(root->reloc_root); root->reloc_root = NULL; } @@ -3916,19 +3931,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, iput(root->ino_cache_inode); root->ino_cache_inode = NULL; } - btrfs_free_fs_root(root); -} - -void btrfs_free_fs_root(struct btrfs_root *root) -{ - WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); - if (root->anon_dev) - free_anon_bdev(root->anon_dev); - btrfs_drew_lock_destroy(&root->snapshot_lock); - free_extent_buffer(root->node); - free_extent_buffer(root->commit_root); - kfree(root->free_ino_ctl); - kfree(root->free_ino_pinned); btrfs_put_root(root); } @@ -4093,8 +4095,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_fsid(fs_info->fs_devices); - btrfs_free_fs_roots(fs_info); - btrfs_put_block_group_cache(fs_info); /* @@ -4106,6 +4106,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); + btrfs_free_fs_roots(fs_info); /* * We must free the block groups after dropping the fs_roots as we could diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 59c885860bf8..cd629113f61c 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -76,7 +76,6 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info); void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -void btrfs_free_fs_root(struct btrfs_root *root); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); @@ -98,20 +97,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) return NULL; } -static inline void btrfs_put_root(struct btrfs_root *root) -{ - if (!root) - return; - if (refcount_dec_and_test(&root->refs)) { -#ifdef CONFIG_BTRFS_DEBUG - spin_lock(&root->fs_info->fs_roots_radix_lock); - list_del_init(&root->leak_list); - spin_unlock(&root->fs_info->fs_roots_radix_lock); -#endif - kfree(root); - } -} - +void btrfs_put_root(struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 001605b672b8..3e7bd2388ed2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5485,13 +5485,10 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) } } - if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { + if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) btrfs_add_dropped_root(trans, root); - } else { - free_extent_buffer(root->node); - free_extent_buffer(root->commit_root); + else btrfs_put_root(root); - } root_dropped = true; out_end_trans: btrfs_end_transaction_throttle(trans); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 36af71040974..39e45b8a5031 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -64,12 +64,21 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) struct extent_buffer *eb; unsigned long flags; + /* + * If we didn't get into open_ctree our allocated_ebs will not be + * initialized, so just skip this. + */ + if (!fs_info->allocated_ebs.next) + return; + spin_lock_irqsave(&fs_info->eb_leak_lock, flags); while (!list_empty(&fs_info->allocated_ebs)) { eb = list_first_entry(&fs_info->allocated_ebs, struct extent_buffer, leak_list); - pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n", - eb->start, eb->len, atomic_read(&eb->refs), eb->bflags); + pr_err( + "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n", + eb->start, eb->len, atomic_read(&eb->refs), eb->bflags, + btrfs_header_owner(eb)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -4875,7 +4884,6 @@ out_free_ulist: static void __free_extent_buffer(struct extent_buffer *eb) { - btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -4941,6 +4949,7 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) { btrfs_release_extent_buffer_pages(eb); + btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); __free_extent_buffer(eb); } @@ -5329,6 +5338,7 @@ static int release_extent_buffer(struct extent_buffer *eb) spin_unlock(&eb->refs_lock); } + btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_pages(eb); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index bc43950eb32f..8b1f5c8897b7 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1251,8 +1251,6 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_free_tree_block(trans, free_space_root, free_space_root->node, 0, 1); - free_extent_buffer(free_space_root->node); - free_extent_buffer(free_space_root->commit_root); btrfs_put_root(free_space_root); return btrfs_commit_transaction(trans); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 75bc3b686498..c3888fb367e7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1038,11 +1038,8 @@ out_add_root: out_free_path: btrfs_free_path(path); out_free_root: - if (ret) { - free_extent_buffer(quota_root->node); - free_extent_buffer(quota_root->commit_root); + if (ret) btrfs_put_root(quota_root); - } out: if (ret) { ulist_free(fs_info->qgroup_ulist); @@ -1105,8 +1102,6 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_tree_unlock(quota_root->node); btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); - free_extent_buffer(quota_root->node); - free_extent_buffer(quota_root->commit_root); btrfs_put_root(quota_root); end_trans: diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2641b6c5c362..7f31dd57de54 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2579,10 +2579,6 @@ void free_reloc_roots(struct list_head *list) reloc_root = list_entry(list->next, struct btrfs_root, root_list); __del_reloc_root(reloc_root); - free_extent_buffer(reloc_root->node); - free_extent_buffer(reloc_root->commit_root); - reloc_root->node = NULL; - reloc_root->commit_root = NULL; } } diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 69c9afef06e3..42e62fd2809c 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -194,6 +194,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) cleanup_srcu_struct(&fs_info->subvol_srcu); kfree(fs_info->super_copy); btrfs_check_leaked_roots(fs_info); + btrfs_extent_buffer_leak_debug_check(fs_info); kfree(fs_info->fs_devices); kfree(fs_info); } @@ -205,10 +206,6 @@ void btrfs_free_dummy_root(struct btrfs_root *root) /* Will be freed by btrfs_free_fs_roots */ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) return; - if (root->node) { - /* One for allocate_extent_buffer */ - free_extent_buffer(root->node); - } btrfs_put_root(root); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0eebd6a02d41..58c111474ba5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3299,7 +3299,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans, clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1, EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); - free_extent_buffer(log->node); btrfs_put_root(log); } @@ -6246,8 +6245,6 @@ again: ret = btrfs_pin_extent_for_log_replay(trans, log->node->start, log->node->len); - free_extent_buffer(log->node); - free_extent_buffer(log->commit_root); btrfs_put_root(log); if (!ret) @@ -6285,8 +6282,6 @@ again: wc.replay_dest->log_root = NULL; btrfs_put_root(wc.replay_dest); - free_extent_buffer(log->node); - free_extent_buffer(log->commit_root); btrfs_put_root(log); if (ret) @@ -6318,7 +6313,6 @@ next: if (ret) return ret; - free_extent_buffer(log_root_tree->node); log_root_tree->log_root = NULL; clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags); btrfs_put_root(log_root_tree); -- cgit From cd22a51c6650f567f297215dd7c47c3401ee7290 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Mar 2020 17:09:53 -0400 Subject: btrfs: do not use readahead for running delayed refs Readahead will generate a lot of extra reads for adjacent nodes, but when running delayed refs we have no idea if the next ref is going to be adjacent or not, so this potentially just generates a lot of extra IO. To make matters worse each ref is truly just looking for one item, it doesn't generally search forward, so we simply don't need it here. Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e7bd2388ed2..54a64d1e18c6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1447,7 +1447,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = READA_FORWARD; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes, @@ -1472,7 +1471,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - path->reada = READA_FORWARD; path->leave_spinning = 1; /* now insert the actual backref */ if (owner < BTRFS_FIRST_FREE_OBJECTID) { @@ -1589,7 +1587,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, } again: - path->reada = READA_FORWARD; path->leave_spinning = 1; ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1); if (ret < 0) { @@ -2978,7 +2975,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = READA_FORWARD; path->leave_spinning = 1; is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; -- cgit