diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
| -rw-r--r-- | fs/btrfs/ctree.c | 179 |
1 files changed, 119 insertions, 60 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2ee43b6a4f09..324df36d28bf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -12,6 +12,8 @@ #include "transaction.h" #include "print-tree.h" #include "locking.h" +#include "volumes.h" +#include "qgroup.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -44,11 +46,18 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i] || !p->locks[i]) continue; - btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); - if (p->locks[i] == BTRFS_READ_LOCK) + /* + * If we currently have a spinning reader or writer lock this + * will bump the count of blocking holders and drop the + * spinlock. + */ + if (p->locks[i] == BTRFS_READ_LOCK) { + btrfs_set_lock_blocking_read(p->nodes[i]); p->locks[i] = BTRFS_READ_LOCK_BLOCKING; - else if (p->locks[i] == BTRFS_WRITE_LOCK) + } else if (p->locks[i] == BTRFS_WRITE_LOCK) { + btrfs_set_lock_blocking_write(p->nodes[i]); p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; + } } } @@ -224,7 +233,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer_fsid(cow, fs_info->fsid); + write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); WARN_ON(btrfs_header_generation(buf) > trans->transid); if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) @@ -967,6 +976,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, return 0; } +static struct extent_buffer *alloc_tree_block_no_bg_flush( + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 parent_start, + const struct btrfs_disk_key *disk_key, + int level, + u64 hint, + u64 empty_size) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *ret; + + /* + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. + * COWing can result in allocation of a new chunk, and flushing pending + * block groups (btrfs_create_pending_block_groups()) can be triggered + * when finishing allocation of a new chunk. Creation of a pending block + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. + * For similar reasons, we also need to delay flushing pending block + * groups when splitting a leaf or node, from one of those trees, since + * we are holding a write lock on it and its parent or when inserting a + * new root node for one of those trees. + */ + if (root == fs_info->extent_root || + root == fs_info->chunk_root || + root == fs_info->dev_root || + root == fs_info->free_space_root) + trans->can_flush_pending_bgs = false; + + ret = btrfs_alloc_tree_block(trans, root, parent_start, + root->root_key.objectid, disk_key, level, + hint, empty_size); + trans->can_flush_pending_bgs = true; + + return ret; +} + /* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked @@ -1014,9 +1065,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) parent_start = parent->start; - cow = btrfs_alloc_tree_block(trans, root, parent_start, - root->root_key.objectid, &disk_key, level, - search_start, empty_size); + cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, + level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -1033,7 +1083,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer_fsid(cow, fs_info->fsid); + write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { @@ -1246,7 +1296,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, return eb; btrfs_set_path_blocking(path); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + btrfs_set_lock_blocking_read(eb); if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); @@ -1273,7 +1323,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); - extent_buffer_get(eb_rewin); btrfs_tree_read_lock(eb_rewin); __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); WARN_ON(btrfs_header_nritems(eb_rewin) > @@ -1337,7 +1386,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); eb = alloc_dummy_extent_buffer(fs_info, logical); } else { - btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); + btrfs_set_lock_blocking_read(eb_root); eb = btrfs_clone_extent_buffer(eb_root); btrfs_tree_read_unlock_blocking(eb_root); free_extent_buffer(eb_root); @@ -1345,7 +1394,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq) if (!eb) return NULL; - extent_buffer_get(eb); btrfs_tree_read_lock(eb); if (old_root) { btrfs_set_header_bytenr(eb, eb->start); @@ -1398,7 +1446,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, * * What is forced COW: * when we create snapshot during committing the transaction, - * after we've finished coping src root, we must COW the shared + * after we've finished copying src root, we must COW the shared * block to ensure the metadata consistency. */ if (btrfs_header_generation(buf) == trans->transid && @@ -1424,6 +1472,10 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, u64 search_start; int ret; + if (test_bit(BTRFS_ROOT_DELETING, &root->state)) + btrfs_err(fs_info, + "COW'ing blocks on a fs root that's being dropped"); + if (trans->transaction != fs_info->running_transaction) WARN(1, KERN_CRIT "trans %llu running %llu\n", trans->transid, @@ -1442,9 +1494,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, search_start = buf->start & ~((u64)SZ_1G - 1); if (parent) - btrfs_set_lock_blocking(parent); - btrfs_set_lock_blocking(buf); + btrfs_set_lock_blocking_write(parent); + btrfs_set_lock_blocking_write(buf); + /* + * Before CoWing this block for later modification, check if it's + * the subtree root and do the delayed subtree trace if needed. + * + * Also We don't care about the error, as it's handled internally. + */ + btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); @@ -1538,7 +1597,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (parent_nritems <= 1) return 0; - btrfs_set_lock_blocking(parent); + btrfs_set_lock_blocking_write(parent); for (i = start_slot; i <= end_slot; i++) { struct btrfs_key first_key; @@ -1597,7 +1656,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = last_block; btrfs_tree_lock(cur); - btrfs_set_lock_blocking(cur); + btrfs_set_lock_blocking_write(cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, @@ -1812,7 +1871,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, } btrfs_tree_lock(child); - btrfs_set_lock_blocking(child); + btrfs_set_lock_blocking_write(child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child); if (ret) { btrfs_tree_unlock(child); @@ -1850,7 +1909,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (left) { btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); + btrfs_set_lock_blocking_write(left); wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left); if (wret) { @@ -1865,7 +1924,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (right) { btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); + btrfs_set_lock_blocking_write(right); wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right); if (wret) { @@ -2028,7 +2087,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, u32 left_nr; btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); + btrfs_set_lock_blocking_write(left); left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { @@ -2083,7 +2142,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, u32 right_nr; btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); + btrfs_set_lock_blocking_write(right); right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { @@ -2485,26 +2544,6 @@ done: return ret; } -static void key_search_validate(struct extent_buffer *b, - const struct btrfs_key *key, - int level) -{ -#ifdef CONFIG_BTRFS_ASSERT - struct btrfs_disk_key disk_key; - - btrfs_cpu_key_to_disk(&disk_key, key); - - if (level == 0) - ASSERT(!memcmp_extent_buffer(b, &disk_key, - offsetof(struct btrfs_leaf, items[0].key), - sizeof(disk_key))); - else - ASSERT(!memcmp_extent_buffer(b, &disk_key, - offsetof(struct btrfs_node, ptrs[0].key), - sizeof(disk_key))); -#endif -} - static int key_search(struct extent_buffer *b, const struct btrfs_key *key, int level, int *prev_cmp, int *slot) { @@ -2513,7 +2552,6 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key, return *prev_cmp; } - key_search_validate(b, key, level); *slot = 0; return 0; @@ -2567,14 +2605,27 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, root_lock = BTRFS_READ_LOCK; if (p->search_commit_root) { - /* The commit roots are read only so we always do read locks */ - if (p->need_commit_sem) + /* + * The commit roots are read only so we always do read locks, + * and we always must hold the commit_root_sem when doing + * searches on them, the only exception is send where we don't + * want to block transaction commits for a long time, so + * we need to clone the commit root in order to avoid races + * with transaction commits that create a snapshot of one of + * the roots used by a send operation. + */ + if (p->need_commit_sem) { down_read(&fs_info->commit_root_sem); - b = root->commit_root; - extent_buffer_get(b); - level = btrfs_header_level(b); - if (p->need_commit_sem) + b = btrfs_clone_extent_buffer(root->commit_root); up_read(&fs_info->commit_root_sem); + if (!b) + return ERR_PTR(-ENOMEM); + + } else { + b = root->commit_root; + extent_buffer_get(b); + } + level = btrfs_header_level(b); /* * Ensure that all callers have set skip_locking when * p->search_commit_root = 1. @@ -2700,6 +2751,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, again: prev_cmp = -1; b = btrfs_search_slot_get_root(root, p, write_lock_level); + if (IS_ERR(b)) { + ret = PTR_ERR(b); + goto done; + } while (b) { level = btrfs_header_level(b); @@ -2944,6 +2999,8 @@ again: */ prev_cmp = -1; ret = key_search(b, key, level, &prev_cmp, &slot); + if (ret < 0) + goto done; if (level != 0) { int dec = 0; @@ -3306,8 +3363,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &lower_key, level, root->node->start, 0); + c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -3436,8 +3493,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); - split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, level, c->start, 0); + split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -3710,7 +3767,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); + btrfs_set_lock_blocking_write(right); free_space = btrfs_leaf_free_space(fs_info, right); if (free_space < data_size) @@ -3734,7 +3791,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root /* Key greater than all keys in the leaf, right neighbor has * enough room for it and we're not emptying our leaf to delete * it, therefore use right neighbor to insert the new item and - * no need to touch/dirty our left leaft. */ + * no need to touch/dirty our left leaf. */ btrfs_tree_unlock(left); free_extent_buffer(left); path->nodes[0] = right; @@ -3944,7 +4001,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); + btrfs_set_lock_blocking_write(left); free_space = btrfs_leaf_free_space(fs_info, left); if (free_space < data_size) { @@ -4221,8 +4278,8 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0); + right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); @@ -5095,6 +5152,10 @@ again: nritems = btrfs_header_nritems(cur); level = btrfs_header_level(cur); sret = btrfs_bin_search(cur, min_key, level, &slot); + if (sret < 0) { + ret = sret; + goto out; + } /* at the lowest level, we're done, setup the path and exit */ if (level == path->lowest_level) { @@ -5373,7 +5434,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, ret = -ENOMEM; goto out; } - extent_buffer_get(left_path->nodes[left_level]); right_level = btrfs_header_level(right_root->commit_root); right_root_level = right_level; @@ -5384,7 +5444,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root, ret = -ENOMEM; goto out; } - extent_buffer_get(right_path->nodes[right_level]); up_read(&fs_info->commit_root_sem); if (left_level == 0) |