From 2ab28f322f9896782da904f5942f3873432addc8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2012 15:27:49 -0400 Subject: Btrfs: wait on ordered extents at the last possible moment Since we don't actually copy the extent information from the source tree in the fast case we don't need to wait for ordered io to be completed in order to fsync, we just need to wait for the io to be completed. So when we're logging our file just attach all of the ordered extents to the log, and then when the log syncs just wait for IO_DONE on the ordered extents and then write the super. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9027bb1e7466..7de720d22b74 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2281,6 +2281,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, unsigned long log_transid = 0; mutex_lock(&root->log_mutex); + log_transid = root->log_transid; index1 = root->log_transid % 2; if (atomic_read(&root->log_commit[index1])) { wait_log_commit(trans, root, root->log_transid); @@ -2308,11 +2309,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* bail out if we need to do a full commit */ if (root->fs_info->last_trans_log_full_commit == trans->transid) { ret = -EAGAIN; + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&root->log_mutex); goto out; } - log_transid = root->log_transid; if (log_transid % 2 == 0) mark = EXTENT_DIRTY; else @@ -2324,6 +2325,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); if (ret) { btrfs_abort_transaction(trans, root, ret); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&root->log_mutex); goto out; } @@ -2363,6 +2365,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } root->fs_info->last_trans_log_full_commit = trans->transid; btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out; @@ -2373,6 +2376,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); wait_log_commit(trans, log_root_tree, log_root_tree->log_transid); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = 0; goto out; @@ -2392,6 +2396,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ if (root->fs_info->last_trans_log_full_commit == trans->transid) { btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out_wake_log_root; @@ -2402,10 +2407,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW); if (ret) { btrfs_abort_transaction(trans, root, ret); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_wait_logged_extents(log, log_transid); btrfs_set_super_log_root(root->fs_info->super_for_commit, log_root_tree->node->start); @@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); } + /* + * We may have short-circuited the log tree with the full commit logic + * and left ordered extents on our list, so clear these out to keep us + * from leaking inodes and memory. + */ + btrfs_free_logged_extents(log, 0); + btrfs_free_logged_extents(log, 1); + free_extent_buffer(log->node); kfree(log); } @@ -3271,14 +3286,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans, struct btrfs_root *log = root->log_root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; + struct btrfs_ordered_extent *ordered; struct list_head ordered_sums; struct btrfs_map_token token; struct btrfs_key key; - u64 csum_offset = em->mod_start - em->start; - u64 csum_len = em->mod_len; + u64 mod_start = em->mod_start; + u64 mod_len = em->mod_len; + u64 csum_offset; + u64 csum_len; u64 extent_offset = em->start - em->orig_start; u64 block_len; int ret; + int index = log->log_transid % 2; bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; INIT_LIST_HEAD(&ordered_sums); @@ -3362,6 +3381,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans, csum_len = block_len; } + /* + * First check and see if our csums are on our outstanding ordered + * extents. + */ +again: + spin_lock_irq(&log->log_extents_lock[index]); + list_for_each_entry(ordered, &log->logged_list[index], log_list) { + struct btrfs_ordered_sum *sum; + + if (!mod_len) + break; + + if (ordered->inode != inode) + continue; + + if (ordered->file_offset + ordered->len <= mod_start || + mod_start + mod_len <= ordered->file_offset) + continue; + + /* + * We are going to copy all the csums on this ordered extent, so + * go ahead and adjust mod_start and mod_len in case this + * ordered extent has already been logged. + */ + if (ordered->file_offset > mod_start) { + if (ordered->file_offset + ordered->len >= + mod_start + mod_len) + mod_len = ordered->file_offset - mod_start; + /* + * If we have this case + * + * |--------- logged extent ---------| + * |----- ordered extent ----| + * + * Just don't mess with mod_start and mod_len, we'll + * just end up logging more csums than we need and it + * will be ok. + */ + } else { + if (ordered->file_offset + ordered->len < + mod_start + mod_len) { + mod_len = (mod_start + mod_len) - + (ordered->file_offset + ordered->len); + mod_start = ordered->file_offset + + ordered->len; + } else { + mod_len = 0; + } + } + + /* + * To keep us from looping for the above case of an ordered + * extent that falls inside of the logged extent. + */ + if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, + &ordered->flags)) + continue; + atomic_inc(&ordered->refs); + spin_unlock_irq(&log->log_extents_lock[index]); + /* + * we've dropped the lock, we must either break or + * start over after this. + */ + + wait_event(ordered->wait, ordered->csum_bytes_left == 0); + + list_for_each_entry(sum, &ordered->list, list) { + ret = btrfs_csum_file_blocks(trans, log, sum); + if (ret) { + btrfs_put_ordered_extent(ordered); + goto unlocked; + } + } + btrfs_put_ordered_extent(ordered); + goto again; + + } + spin_unlock_irq(&log->log_extents_lock[index]); +unlocked: + + if (!mod_len || ret) + return ret; + + csum_offset = mod_start - em->start; + csum_len = mod_len; + /* block start is already adjusted for the file extent offset. */ ret = btrfs_lookup_csums_range(log->fs_info->csum_root, em->block_start + csum_offset, @@ -3393,6 +3498,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; u64 test_gen; int ret = 0; + int num = 0; INIT_LIST_HEAD(&extents); @@ -3401,16 +3507,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, list_for_each_entry_safe(em, n, &tree->modified_extents, list) { list_del_init(&em->list); + + /* + * Just an arbitrary number, this can be really CPU intensive + * once we start getting a lot of extents, and really once we + * have a bunch of extents we just want to commit since it will + * be faster. + */ + if (++num > 32768) { + list_del_init(&tree->modified_extents); + ret = -EFBIG; + goto process; + } + if (em->generation <= test_gen) continue; /* Need a ref to keep it from getting evicted from cache */ atomic_inc(&em->refs); set_bit(EXTENT_FLAG_LOGGING, &em->flags); list_add_tail(&em->list, &extents); + num++; } list_sort(NULL, &extents, extent_cmp); +process: while (!list_empty(&extents)) { em = list_entry(extents.next, struct extent_map, list); @@ -3513,6 +3634,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&BTRFS_I(inode)->log_mutex); + btrfs_get_logged_extents(log, inode); + /* * a brute force approach to making sure we get the most uptodate * copies of everything. @@ -3656,6 +3779,8 @@ log_extents: BTRFS_I(inode)->logged_trans = trans->transid; BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; out_unlock: + if (err) + btrfs_free_logged_extents(log, log->log_transid); mutex_unlock(&BTRFS_I(inode)->log_mutex); btrfs_free_path(path); @@ -3822,7 +3947,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, end_trans: dput(old_parent); if (ret < 0) { - WARN_ON(ret != -ENOSPC); root->fs_info->last_trans_log_full_commit = trans->transid; ret = 1; } -- cgit From dcfac4156fa102c1bab0e4e31df37e47278292f6 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 27 Dec 2012 09:01:20 +0000 Subject: Btrfs: kill unused argument of btrfs_pin_extent_for_log_replay Argument 'trans' is not used any more. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/extent-tree.c | 3 +-- fs/btrfs/tree-log.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 411c8d97074e..22f012d41fd0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2939,8 +2939,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 num_bytes, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); -int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, u64 bytenr, u64 num_bytes); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a162c7cb8fb5..825f23b13b58 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4898,8 +4898,7 @@ int btrfs_pin_extent(struct btrfs_root *root, /* * this function must be called within transaction */ -int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, u64 bytenr, u64 num_bytes) { struct btrfs_block_group_cache *cache; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7de720d22b74..deb7f66356bd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_pin_extent_for_log_replay(wc->trans, - log->fs_info->extent_root, + btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, eb->start, eb->len); if (btrfs_buffer_uptodate(eb, gen, 0)) { -- cgit From de78b51a2852bddccd6535e9e12de65f92787a1e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 31 Jan 2013 18:21:12 +0000 Subject: btrfs: remove cache only arguments from defrag path The entry point at the defrag ioctl always sets "cache only" to 0; the codepaths haven't run for a long time as far as I can tell. Chris says they're dead code, so remove them. Signed-off-by: Eric Sandeen Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 65 ++++++++------------------------------------------ fs/btrfs/ctree.h | 8 +++---- fs/btrfs/ioctl.c | 8 +++---- fs/btrfs/transaction.c | 8 +++---- fs/btrfs/transaction.h | 2 +- fs/btrfs/tree-defrag.c | 19 ++++++--------- fs/btrfs/tree-log.c | 4 ++-- 7 files changed, 32 insertions(+), 82 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35444013f0cf..ecd25a1b4e51 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1442,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) */ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, - int start_slot, int cache_only, u64 *last_ret, + int start_slot, u64 *last_ret, struct btrfs_key *progress) { struct extent_buffer *cur; @@ -1462,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; parent_level = btrfs_header_level(parent); - if (cache_only && parent_level != 1) - return 0; WARN_ON(trans->transaction != root->fs_info->running_transaction); WARN_ON(trans->transid != root->fs_info->generation); @@ -1509,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, else uptodate = 0; if (!cur || !uptodate) { - if (cache_only) { - free_extent_buffer(cur); - continue; - } if (!cur) { cur = read_tree_block(root, blocknr, blocksize, gen); @@ -4826,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) /* * A helper function to walk down the tree starting at min_key, and looking - * for nodes or leaves that are either in cache or have a minimum - * transaction id. This is used by the btree defrag code, and tree logging + * for nodes or leaves that are have a minimum transaction id. + * This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the @@ -4848,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_key *max_key, - struct btrfs_path *path, int cache_only, + struct btrfs_path *path, u64 min_trans) { struct extent_buffer *cur; @@ -4888,15 +4882,12 @@ again: if (sret && slot > 0) slot--; /* - * check this node pointer against the cache_only and - * min_trans parameters. If it isn't in cache or is too - * old, skip to the next one. + * check this node pointer against the min_trans parameters. + * If it is too old, old, skip to the next one. */ while (slot < nritems) { u64 blockptr; u64 gen; - struct extent_buffer *tmp; - struct btrfs_disk_key disk_key; blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); @@ -4904,27 +4895,7 @@ again: slot++; continue; } - if (!cache_only) - break; - - if (max_key) { - btrfs_node_key(cur, &disk_key, slot); - if (comp_keys(&disk_key, max_key) >= 0) { - ret = 1; - goto out; - } - } - - tmp = btrfs_find_tree_block(root, blockptr, - btrfs_level_size(root, level - 1)); - - if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) { - free_extent_buffer(tmp); - break; - } - if (tmp) - free_extent_buffer(tmp); - slot++; + break; } find_next_key: /* @@ -4935,7 +4906,7 @@ find_next_key: path->slots[level] = slot; btrfs_set_path_blocking(path); sret = btrfs_find_next_key(root, path, min_key, level, - cache_only, min_trans); + min_trans); if (sret == 0) { btrfs_release_path(path); goto again; @@ -5400,8 +5371,7 @@ out: /* * this is similar to btrfs_next_leaf, but does not try to preserve * and fixup the path. It looks for and returns the next key in the - * tree based on the current path and the cache_only and min_trans - * parameters. + * tree based on the current path and the min_trans parameters. * * 0 is returned if another key is found, < 0 if there are any errors * and 1 is returned if there are no higher keys in the tree @@ -5410,8 +5380,7 @@ out: * calling this function. */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int level, - int cache_only, u64 min_trans) + struct btrfs_key *key, int level, u64 min_trans) { int slot; struct extent_buffer *c; @@ -5462,22 +5431,8 @@ next: if (level == 0) btrfs_item_key_to_cpu(c, key, slot); else { - u64 blockptr = btrfs_node_blockptr(c, slot); u64 gen = btrfs_node_ptr_generation(c, slot); - if (cache_only) { - struct extent_buffer *cur; - cur = btrfs_find_tree_block(root, blockptr, - btrfs_level_size(root, level - 1)); - if (!cur || - btrfs_buffer_uptodate(cur, gen, 1) <= 0) { - slot++; - if (cur) - free_extent_buffer(cur); - goto next; - } - free_extent_buffer(cur); - } if (gen < min_trans) { slot++; goto next; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1e3a994e1899..7e2cffd2a5d8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3125,10 +3125,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, - int cache_only, u64 min_trans); + u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_key *max_key, - struct btrfs_path *path, int cache_only, + struct btrfs_path *path, u64 min_trans); enum btrfs_compare_tree_result { BTRFS_COMPARE_TREE_NEW, @@ -3181,7 +3181,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root, int find_higher, int return_any); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, - int start_slot, int cache_only, u64 *last_ret, + int start_slot, u64 *last_ret, struct btrfs_key *progress); void btrfs_release_path(struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -3576,7 +3576,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int cache_only); + struct btrfs_root *root); /* sysfs.c */ int btrfs_init_sysfs(void); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 96ecefc1724f..daea831f3d36 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -818,7 +818,7 @@ static int find_new_extents(struct btrfs_root *root, while(1) { ret = btrfs_search_forward(root, &min_key, &max_key, - path, 0, newer_than); + path, newer_than); if (ret != 0) goto none; if (min_key.objectid != ino) @@ -1864,7 +1864,7 @@ static noinline int search_ioctl(struct inode *inode, path->keep_locks = 1; while(1) { - ret = btrfs_search_forward(root, &key, &max_key, path, 0, + ret = btrfs_search_forward(root, &key, &max_key, path, sk->min_transid); if (ret != 0) { if (ret > 0) @@ -2212,10 +2212,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EPERM; goto out; } - ret = btrfs_defrag_root(root, 0); + ret = btrfs_defrag_root(root); if (ret) goto out; - ret = btrfs_defrag_root(root->fs_info->extent_root, 0); + ret = btrfs_defrag_root(root->fs_info->extent_root); break; case S_IFREG: if (!(file->f_mode & FMODE_WRITE)) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5144ad19ef47..60481a53e004 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -959,10 +959,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, } /* - * defrag a given btree. If cacheonly == 1, this won't read from the disk, - * otherwise every leaf in the btree is read and defragged. + * defrag a given btree. + * Every leaf in the btree is read and defragged. */ -int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) +int btrfs_defrag_root(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; struct btrfs_trans_handle *trans; @@ -976,7 +976,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_defrag_leaves(trans, root, cacheonly); + ret = btrfs_defrag_leaves(trans, root); btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(info->tree_root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 69700f7b20ac..46628210e5d8 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -117,7 +117,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_add_dead_root(struct btrfs_root *root); -int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); +int btrfs_defrag_root(struct btrfs_root *root); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3b580ee8ab1d..94e05c1f118a 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,13 +23,14 @@ #include "transaction.h" #include "locking.h" -/* defrag all the leaves in a given btree. If cache_only == 1, don't read - * things from disk, otherwise read all the leaves and try to get key order to +/* + * Defrag all the leaves in a given btree. + * Read all the leaves and try to get key order to * better reflect disk order */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int cache_only) + struct btrfs_root *root) { struct btrfs_path *path = NULL; struct btrfs_key key; @@ -41,9 +42,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, u64 last_ret = 0; u64 min_trans = 0; - if (cache_only) - goto out; - if (root->fs_info->extent_root == root) { /* * there's recursion here right now in the tree locking, @@ -86,11 +84,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } path->keep_locks = 1; - if (cache_only) - min_trans = root->defrag_trans_start; - ret = btrfs_search_forward(root, &key, NULL, path, - cache_only, min_trans); + ret = btrfs_search_forward(root, &key, NULL, path, min_trans); if (ret < 0) goto out; if (ret > 0) { @@ -109,11 +104,11 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, + next_key_ret = btrfs_find_next_key(root, path, &key, 1, min_trans); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, - cache_only, &last_ret, + &last_ret, &root->defrag_progress); if (ret) { WARN_ON(ret == -EAGAIN); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index deb7f66356bd..1a79087c4575 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2738,7 +2738,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->keep_locks = 1; ret = btrfs_search_forward(root, &min_key, &max_key, - path, 0, trans->transid); + path, trans->transid); /* * we didn't find anything from this transaction, see if there @@ -3680,7 +3680,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, while (1) { ins_nr = 0; ret = btrfs_search_forward(root, &min_key, &max_key, - path, 0, trans->transid); + path, trans->transid); if (ret != 0) break; again: -- cgit From fda2832febb1928da0625b2c5d15559b29d7e740 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 26 Feb 2013 08:10:22 +0000 Subject: btrfs: cleanup for open-coded alignment Though most of the btrfs codes are using ALIGN macro for page alignment, there are still some codes using open-coded alignment like the following: ------ u64 mask = ((u64)root->stripesize - 1); u64 ret = (val + mask) & ~mask; ------ Or even hidden one: ------ num_bytes = (end - start + blocksize) & ~(blocksize - 1); ------ Sometimes these open-coded alignment is not so easy to understand for newbie like me. This commit changes the open-coded alignment to the ALIGN macro for a better readability. Also there is a previous patch from David Sterba with similar changes, but the patch is for 3.2 kernel and seems not merged. http://www.spinics.net/lists/linux-btrfs/msg12747.html Cc: David Sterba Signed-off-by: Qu Wenruo Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 9 +++------ fs/btrfs/extent_io.c | 8 ++++---- fs/btrfs/file.c | 3 +-- fs/btrfs/inode.c | 37 +++++++++++++++---------------------- fs/btrfs/tree-log.c | 3 +-- fs/btrfs/volumes.c | 3 +-- 6 files changed, 25 insertions(+), 38 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8520354f086e..5681a91ed400 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3431,7 +3431,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) int ret = 0, committed = 0, alloc_chunk = 1; /* make sure bytes are sectorsize aligned */ - bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + bytes = ALIGN(bytes, root->sectorsize); if (root == root->fs_info->tree_root || BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { @@ -3526,7 +3526,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) struct btrfs_space_info *data_sinfo; /* make sure bytes are sectorsize aligned */ - bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + bytes = ALIGN(bytes, root->sectorsize); data_sinfo = root->fs_info->data_sinfo; spin_lock(&data_sinfo->lock); @@ -5607,10 +5607,7 @@ static u64 stripe_align(struct btrfs_root *root, struct btrfs_block_group_cache *cache, u64 val, u64 num_bytes) { - u64 mask; - u64 ret; - mask = ((u64)root->stripesize - 1); - ret = (val + mask) & ~mask; + u64 ret = ALIGN(val, root->stripesize); return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 66f999b97cbb..597ab8966c80 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2686,7 +2686,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, iosize = min(extent_map_end(em) - cur, end - cur + 1); cur_end = min(extent_map_end(em) - 1, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + iosize = ALIGN(iosize, blocksize); if (this_bio_flag & EXTENT_BIO_COMPRESSED) { disk_io_size = em->block_len; sector = em->block_start >> 9; @@ -2977,7 +2977,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, BUG_ON(extent_map_end(em) <= cur); BUG_ON(end < cur); iosize = min(extent_map_end(em) - cur, end - cur + 1); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + iosize = ALIGN(iosize, blocksize); sector = (em->block_start + extent_offset) >> 9; bdev = em->bdev; block_start = em->block_start; @@ -3664,7 +3664,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; - start += (offset + blocksize - 1) & ~(blocksize - 1); + start += ALIGN(offset, blocksize); if (start > end) return 0; @@ -3783,7 +3783,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, len = last - offset; if (len == 0) break; - len = (len + sectorsize - 1) & ~(sectorsize - 1); + len = ALIGN(len, sectorsize); em = get_extent(inode, NULL, 0, offset, len, 0); if (IS_ERR_OR_NULL(em)) return em; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6e6dd8cdad92..83c790d84038 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -510,8 +510,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, loff_t isize = i_size_read(inode); start_pos = pos & ~((u64)root->sectorsize - 1); - num_bytes = (write_bytes + pos - start_pos + - root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); end_of_last_block = start_pos + num_bytes - 1; err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index be09654e11b9..9ef7a5b1b77e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -233,8 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, u64 isize = i_size_read(inode); u64 actual_end = min(end + 1, isize); u64 inline_len = actual_end - start; - u64 aligned_end = (end + root->sectorsize - 1) & - ~((u64)root->sectorsize - 1); + u64 aligned_end = ALIGN(end, root->sectorsize); u64 data_len = inline_len; int ret; @@ -391,7 +390,7 @@ again: * a compressed extent to 128k. */ total_compressed = min(total_compressed, max_uncompressed); - num_bytes = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = ALIGN(end - start + 1, blocksize); num_bytes = max(blocksize, num_bytes); total_in = 0; ret = 0; @@ -490,15 +489,13 @@ cont: * up to a block size boundary so the allocator does sane * things */ - total_compressed = (total_compressed + blocksize - 1) & - ~(blocksize - 1); + total_compressed = ALIGN(total_compressed, blocksize); /* * one last check to make sure the compression is really a * win, compare the page count read with the blocks on disk */ - total_in = (total_in + PAGE_CACHE_SIZE - 1) & - ~(PAGE_CACHE_SIZE - 1); + total_in = ALIGN(total_in, PAGE_CACHE_SIZE); if (total_compressed >= total_in) { will_compress = 0; } else { @@ -856,7 +853,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, BUG_ON(btrfs_is_free_space_inode(inode)); - num_bytes = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = ALIGN(end - start + 1, blocksize); num_bytes = max(blocksize, num_bytes); disk_num_bytes = num_bytes; @@ -4015,7 +4012,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; - u64 mask = root->sectorsize - 1; u32 found_type = (u8)-1; int found_extent; int del_item; @@ -4039,7 +4035,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * extent just the way it is. */ if (root->ref_cows || root == root->fs_info->tree_root) - btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0); + btrfs_drop_extent_cache(inode, ALIGN(new_size, + root->sectorsize), (u64)-1, 0); /* * This function is also used to drop the items in the log tree before @@ -4118,10 +4115,9 @@ search_again: if (!del_item) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - extent_num_bytes = new_size - - found_key.offset + root->sectorsize - 1; - extent_num_bytes = extent_num_bytes & - ~((u64)root->sectorsize - 1); + extent_num_bytes = ALIGN(new_size - + found_key.offset, + root->sectorsize); btrfs_set_file_extent_num_bytes(leaf, fi, extent_num_bytes); num_dec = (orig_num_bytes - @@ -4357,9 +4353,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) struct extent_map *em = NULL; struct extent_state *cached_state = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 mask = root->sectorsize - 1; - u64 hole_start = (oldsize + mask) & ~mask; - u64 block_end = (size + mask) & ~mask; + u64 hole_start = ALIGN(oldsize, root->sectorsize); + u64 block_end = ALIGN(size, root->sectorsize); u64 last_byte; u64 cur_offset; u64 hole_size; @@ -4392,7 +4387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) break; } last_byte = min(extent_map_end(em), block_end); - last_byte = (last_byte + mask) & ~mask; + last_byte = ALIGN(last_byte , root->sectorsize); if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { struct extent_map *hole_em; hole_size = last_byte - cur_offset; @@ -6111,8 +6106,7 @@ again: } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size_t size; size = btrfs_file_extent_inline_len(leaf, item); - extent_end = (extent_start + size + root->sectorsize - 1) & - ~((u64)root->sectorsize - 1); + extent_end = ALIGN(extent_start + size, root->sectorsize); } if (start >= extent_end) { @@ -6184,8 +6178,7 @@ again: copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->len = (copy_size + root->sectorsize - 1) & - ~((u64)root->sectorsize - 1); + em->len = ALIGN(copy_size, root->sectorsize); em->orig_block_len = em->len; em->orig_start = em->start; if (compress_type) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1a79087c4575..e8b7a68e1b37 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -484,7 +484,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, struct btrfs_key *key) { int found_type; - u64 mask = root->sectorsize - 1; u64 extent_end; u64 start = key->offset; u64 saved_nbytes; @@ -501,7 +500,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = start + btrfs_file_extent_num_bytes(eb, item); else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); - extent_end = (start + size + mask) & ~mask; + extent_end = ALIGN(start + size, root->sectorsize); } else { ret = 0; goto out; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 538c5cfa005f..db72e0cc6f87 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4556,8 +4556,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_stripes = 1; stripe_index = 0; stripe_nr_orig = stripe_nr; - stripe_nr_end = (offset + *length + map->stripe_len - 1) & - (~(map->stripe_len - 1)); + stripe_nr_end = ALIGN(offset + *length, map->stripe_len); do_div(stripe_nr_end, map->stripe_len); stripe_end_offset = stripe_nr_end * map->stripe_len - (offset + *length); -- cgit From 3321719ed67440bba1b0c5ae19c30d640263ccc8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 27 Feb 2013 13:28:24 +0000 Subject: Btrfs: fix memory leak of log roots When we abort a transaction while fsyncing, we'll skip freeing log roots part of committing a transaction, which leads to memory leak. This adds a 'free log roots' in putting super when no more users hold references on log roots, so it's safe and clean. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 5 +++++ fs/btrfs/tree-log.c | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb7c14308521..5031e6dd5938 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3253,6 +3253,11 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + btrfs_free_log(NULL, root); + btrfs_free_log_root_tree(NULL, fs_info); + } + __btrfs_remove_free_space_cache(root->free_ino_pinned); __btrfs_remove_free_space_cache(root->free_ino_ctl); free_fs_root(root); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e8b7a68e1b37..8e85e0e4333d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2466,8 +2466,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans, .process_func = process_one_buffer }; - ret = walk_log_tree(trans, log, &wc); - BUG_ON(ret); + if (trans) { + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + } while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, -- cgit From 124fe663f93162d17b7e391705cac122101e93d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2013 11:47:21 -0500 Subject: Btrfs: delete inline extents when we find them during logging Apparently when we do inline extents we allow the data to overlap the last chunk of the btrfs_file_extent_item, which means that we can possibly have a btrfs_file_extent_item that isn't actually as large as a btrfs_file_extent_item. This messes with us when we try to overwrite the extent when logging new extents since we expect for it to be the right size. To fix this just delete the item and try to do the insert again which will give us the proper sized btrfs_file_extent_item. This fixes a panic where map_private_extent_buffer would blow up because we're trying to write past the end of the leaf. Thanks, Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8e85e0e4333d..c7ef569eb22a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3300,6 +3300,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, int index = log->log_transid % 2; bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; +insert: INIT_LIST_HEAD(&ordered_sums); btrfs_init_map_token(&token); key.objectid = btrfs_ino(inode); @@ -3315,6 +3316,23 @@ static int log_one_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + + /* + * If we are overwriting an inline extent with a real one then we need + * to just delete the inline extent as it may not be large enough to + * have the entire file_extent_item. + */ + if (ret && btrfs_token_file_extent_type(leaf, fi, &token) == + BTRFS_FILE_EXTENT_INLINE) { + ret = btrfs_del_item(trans, log, path); + btrfs_release_path(path); + if (ret) { + path->really_keep_locks = 0; + return ret; + } + goto insert; + } + btrfs_set_token_file_extent_generation(leaf, fi, em->generation, &token); if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { -- cgit From 9bf7a4890518186238d2579be16ecc5190a707c0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2013 13:35:47 -0500 Subject: Btrfs: use set_nlink if our i_nlink is 0 We need to inc the nlink of deleted entries when running replay so we can do the unlink on the fs_root and get everything cleaned up and then have the orphan cleanup do the right thing. The problem is inc_nlink complains about this, even thought it still does the right thing. So use set_nlink() if our i_nlink is 0 to keep users from seeing the warnings during log replay. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7ef569eb22a..451fad96ecd1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1382,7 +1382,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { - btrfs_inc_nlink(inode); + if (!inode->i_nlink) + set_nlink(inode, 1); + else + btrfs_inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; -- cgit