From 161c3549b45aeef05451b6822d8aaaf39c7bedce Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Sep 2015 16:17:39 -0400 Subject: Btrfs: change how we wait for pending ordered extents We have a mechanism to make sure we don't lose updates for ordered extents that were logged in the transaction that is currently running. We add the ordered extent to a transaction list and then the transaction waits on all the ordered extents in that list. However are substantially large file systems this list can be extremely large, and can give us soft lockups, since the ordered extents don't remove themselves from the list when they do complete. To fix this we simply add a counter to the transaction that is incremented any time we have a logged extent that needs to be completed in the current transaction. Then when the ordered extent finally completes it decrements the per transaction counter and wakes up the transaction if we are the last ones. This will eliminate the softlockup. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9354e7a1247f..68a56c3cc555 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -232,6 +232,7 @@ loop: extwriter_counter_init(cur_trans, type); init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); + init_waitqueue_head(&cur_trans->pending_wait); cur_trans->state = TRANS_STATE_RUNNING; /* * One for this trans handle, one so it will live on until we @@ -239,6 +240,7 @@ loop: */ atomic_set(&cur_trans->use_count, 2); cur_trans->have_free_bgs = 0; + atomic_set(&cur_trans->pending_ordered, 0); cur_trans->start_time = get_seconds(); cur_trans->dirty_bg_run = 0; @@ -266,7 +268,6 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->switch_commits); - INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs); INIT_LIST_HEAD(&cur_trans->dropped_roots); @@ -551,7 +552,6 @@ again: h->can_flush_pending_bgs = true; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); - INIT_LIST_HEAD(&h->ordered); smp_mb(); if (cur_trans->state >= TRANS_STATE_BLOCKED && @@ -784,12 +784,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); - if (!list_empty(&trans->ordered)) { - spin_lock(&info->trans_lock); - list_splice_init(&trans->ordered, &cur_trans->pending_ordered); - spin_unlock(&info->trans_lock); - } - trans->delayed_ref_updates = 0; if (!trans->sync) { must_run_delayed_refs = @@ -1788,25 +1782,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) } static inline void -btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, - struct btrfs_fs_info *fs_info) +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans) { - struct btrfs_ordered_extent *ordered; - - spin_lock(&fs_info->trans_lock); - while (!list_empty(&cur_trans->pending_ordered)) { - ordered = list_first_entry(&cur_trans->pending_ordered, - struct btrfs_ordered_extent, - trans_list); - list_del_init(&ordered->trans_list); - spin_unlock(&fs_info->trans_lock); - - wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE, - &ordered->flags)); - btrfs_put_ordered_extent(ordered); - spin_lock(&fs_info->trans_lock); - } - spin_unlock(&fs_info->trans_lock); + wait_event(cur_trans->pending_wait, + atomic_read(&cur_trans->pending_ordered) == 0); } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -1890,7 +1869,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); - list_splice_init(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); @@ -1949,7 +1927,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_wait_delalloc_flush(root->fs_info); - btrfs_wait_pending_ordered(cur_trans, root->fs_info); + btrfs_wait_pending_ordered(cur_trans); btrfs_scrub_pause(root); /* -- cgit v1.2.3-73-gaa49b From 3204d33cda40d9bc97f257c441225d3713916661 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Sep 2015 10:46:10 -0400 Subject: Btrfs: add a flags field to btrfs_transaction I want to set some per transaction flags, so instead of adding yet another int lets just convert the current two int indicators to flags and add a flags field for future use. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/transaction.c | 18 ++++++++---------- fs/btrfs/transaction.h | 9 ++++----- fs/btrfs/volumes.c | 2 +- 4 files changed, 16 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/transaction.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e32abf53b5b..e0dfe3578b00 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4066,7 +4066,8 @@ commit_trans: if (IS_ERR(trans)) return PTR_ERR(trans); if (have_pinned_space >= 0 || - trans->transaction->have_free_bgs || + test_bit(BTRFS_TRANS_HAVE_FREE_BGS, + &trans->transaction->flags) || need_commit > 0) { ret = btrfs_commit_transaction(trans, root); if (ret) @@ -8938,7 +8939,7 @@ again: * back off and let this transaction commit */ mutex_lock(&root->fs_info->ro_block_group_mutex); - if (trans->transaction->dirty_bg_run) { + if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { u64 transid = trans->transid; mutex_unlock(&root->fs_info->ro_block_group_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 68a56c3cc555..222f9a99a3ce 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -239,10 +239,9 @@ loop: * commit the transaction. */ atomic_set(&cur_trans->use_count, 2); - cur_trans->have_free_bgs = 0; atomic_set(&cur_trans->pending_ordered, 0); + cur_trans->flags = 0; cur_trans->start_time = get_seconds(); - cur_trans->dirty_bg_run = 0; memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); @@ -1837,7 +1836,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } - if (!cur_trans->dirty_bg_run) { + if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) { int run_it = 0; /* this mutex is also taken before trying to set @@ -1846,18 +1845,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, * after a extents from that block group have been * allocated for cache files. btrfs_set_block_group_ro * will wait for the transaction to commit if it - * finds dirty_bg_run = 1 + * finds BTRFS_TRANS_DIRTY_BG_RUN set. * - * The dirty_bg_run flag is also used to make sure only - * one process starts all the block group IO. It wouldn't + * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure + * only one process starts all the block group IO. It wouldn't * hurt to have more than one go through, but there's no * real advantage to it either. */ mutex_lock(&root->fs_info->ro_block_group_mutex); - if (!cur_trans->dirty_bg_run) { + if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN, + &cur_trans->flags)) run_it = 1; - cur_trans->dirty_bg_run = 1; - } mutex_unlock(&root->fs_info->ro_block_group_mutex); if (run_it) @@ -2127,7 +2125,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - if (cur_trans->have_free_bgs) + if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags)) btrfs_clear_space_info_full(root->fs_info); root->fs_info->last_trans_committed = cur_trans->transid; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index bf7b1ddf5993..109b8b7fb48e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -32,6 +32,9 @@ enum btrfs_trans_state { TRANS_STATE_MAX = 6, }; +#define BTRFS_TRANS_HAVE_FREE_BGS 0 +#define BTRFS_TRANS_DIRTY_BG_RUN 1 + struct btrfs_transaction { u64 transid; /* @@ -48,10 +51,7 @@ struct btrfs_transaction { atomic_t use_count; atomic_t pending_ordered; - /* - * true if there is free bgs operations in this transaction - */ - int have_free_bgs; + unsigned long flags; /* Be protected by fs_info->trans_lock when we want to change it. */ enum btrfs_trans_state state; @@ -81,7 +81,6 @@ struct btrfs_transaction { spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; - int dirty_bg_run; }; #define __TRANS_FREEZABLE (1U << 0) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e875b6cc1e20..f86d83805b44 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1462,7 +1462,7 @@ again: btrfs_std_error(root->fs_info, ret, "Failed to remove dev extent item"); } else { - trans->transaction->have_free_bgs = 1; + set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); } out: btrfs_free_path(path); -- cgit v1.2.3-73-gaa49b