diff options
Diffstat (limited to 'fs/btrfs/send.c')
| -rw-r--r-- | fs/btrfs/send.c | 417 | 
1 files changed, 331 insertions, 86 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 040324d71118..d8ccb62aa7d2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -24,6 +24,7 @@  #include "transaction.h"  #include "compression.h"  #include "xattr.h" +#include "print-tree.h"  /*   * Maximum number of references an extent can have in order for us to attempt to @@ -98,6 +99,15 @@ struct send_ctx {  	struct btrfs_key *cmp_key;  	/* +	 * Keep track of the generation of the last transaction that was used +	 * for relocating a block group. This is periodically checked in order +	 * to detect if a relocation happened since the last check, so that we +	 * don't operate on stale extent buffers for nodes (level >= 1) or on +	 * stale disk_bytenr values of file extent items. +	 */ +	u64 last_reloc_trans; + +	/*  	 * infos of the currently processed inode. In case of deleted inodes,  	 * these are the values from the deleted inode.  	 */ @@ -898,7 +908,6 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,  			     iterate_inode_ref_t iterate, void *ctx)  {  	struct extent_buffer *eb = path->nodes[0]; -	struct btrfs_item *item;  	struct btrfs_inode_ref *iref;  	struct btrfs_inode_extref *extref;  	struct btrfs_path *tmp_path; @@ -930,12 +939,11 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,  	if (found_key->type == BTRFS_INODE_REF_KEY) {  		ptr = (unsigned long)btrfs_item_ptr(eb, slot,  						    struct btrfs_inode_ref); -		item = btrfs_item_nr(slot); -		total = btrfs_item_size(eb, item); +		total = btrfs_item_size(eb, slot);  		elem_size = sizeof(*iref);  	} else {  		ptr = btrfs_item_ptr_offset(eb, slot); -		total = btrfs_item_size_nr(eb, slot); +		total = btrfs_item_size(eb, slot);  		elem_size = sizeof(*extref);  	} @@ -1004,7 +1012,7 @@ out:  typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,  				  const char *name, int name_len,  				  const char *data, int data_len, -				  u8 type, void *ctx); +				  void *ctx);  /*   * Helper function to iterate the entries in ONE btrfs_dir_item. @@ -1018,7 +1026,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,  {  	int ret = 0;  	struct extent_buffer *eb; -	struct btrfs_item *item;  	struct btrfs_dir_item *di;  	struct btrfs_key di_key;  	char *buf = NULL; @@ -1030,7 +1037,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,  	u32 total;  	int slot;  	int num; -	u8 type;  	/*  	 * Start with a small buffer (1 page). If later we end up needing more @@ -1047,20 +1053,18 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,  	eb = path->nodes[0];  	slot = path->slots[0]; -	item = btrfs_item_nr(slot);  	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);  	cur = 0;  	len = 0; -	total = btrfs_item_size(eb, item); +	total = btrfs_item_size(eb, slot);  	num = 0;  	while (cur < total) {  		name_len = btrfs_dir_name_len(eb, di);  		data_len = btrfs_dir_data_len(eb, di); -		type = btrfs_dir_type(eb, di);  		btrfs_dir_item_key_to_cpu(eb, di, &di_key); -		if (type == BTRFS_FT_XATTR) { +		if (btrfs_dir_type(eb, di) == BTRFS_FT_XATTR) {  			if (name_len > XATTR_NAME_MAX) {  				ret = -ENAMETOOLONG;  				goto out; @@ -1110,7 +1114,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,  		cur += len;  		ret = iterate(num, &di_key, buf, name_len, buf + name_len, -				data_len, type, ctx); +			      data_len, ctx);  		if (ret < 0)  			goto out;  		if (ret) { @@ -1427,6 +1431,26 @@ static int find_extent_clone(struct send_ctx *sctx,  	if (ret < 0)  		goto out; +	down_read(&fs_info->commit_root_sem); +	if (fs_info->last_reloc_trans > sctx->last_reloc_trans) { +		/* +		 * A transaction commit for a transaction in which block group +		 * relocation was done just happened. +		 * The disk_bytenr of the file extent item we processed is +		 * possibly stale, referring to the extent's location before +		 * relocation. So act as if we haven't found any clone sources +		 * and fallback to write commands, which will read the correct +		 * data from the new extent location. Otherwise we will fail +		 * below because we haven't found our own back reference or we +		 * could be getting incorrect sources in case the old extent +		 * was already reallocated after the relocation. +		 */ +		up_read(&fs_info->commit_root_sem); +		ret = -ENOENT; +		goto out; +	} +	up_read(&fs_info->commit_root_sem); +  	if (!backref_ctx.found_itself) {  		/* found a bug in backref code? */  		ret = -EIO; @@ -1692,8 +1716,7 @@ out:   */  static int lookup_dir_item_inode(struct btrfs_root *root,  				 u64 dir, const char *name, int name_len, -				 u64 *found_inode, -				 u8 *found_type) +				 u64 *found_inode)  {  	int ret = 0;  	struct btrfs_dir_item *di; @@ -1716,7 +1739,6 @@ static int lookup_dir_item_inode(struct btrfs_root *root,  		goto out;  	}  	*found_inode = key.objectid; -	*found_type = btrfs_dir_type(path->nodes[0], di);  out:  	btrfs_free_path(path); @@ -1839,7 +1861,6 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,  	int ret = 0;  	u64 gen;  	u64 other_inode = 0; -	u8 other_type = 0;  	if (!sctx->parent_root)  		goto out; @@ -1867,7 +1888,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,  	}  	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, -			&other_inode, &other_type); +				    &other_inode);  	if (ret < 0 && ret != -ENOENT)  		goto out;  	if (ret) { @@ -1912,7 +1933,6 @@ static int did_overwrite_ref(struct send_ctx *sctx,  	int ret = 0;  	u64 gen;  	u64 ow_inode; -	u8 other_type;  	if (!sctx->parent_root)  		goto out; @@ -1936,7 +1956,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,  	/* check if the ref was overwritten by another ref */  	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, -			&ow_inode, &other_type); +				    &ow_inode);  	if (ret < 0 && ret != -ENOENT)  		goto out;  	if (ret) { @@ -3622,7 +3642,7 @@ static int is_ancestor(struct btrfs_root *root,  		    key.type != BTRFS_INODE_EXTREF_KEY)  			break; -		item_size = btrfs_item_size_nr(leaf, slot); +		item_size = btrfs_item_size(leaf, slot);  		while (cur_offset < item_size) {  			u64 parent;  			u64 parent_gen; @@ -4651,9 +4671,8 @@ out:  }  static int __process_new_xattr(int num, struct btrfs_key *di_key, -			       const char *name, int name_len, -			       const char *data, int data_len, -			       u8 type, void *ctx) +			       const char *name, int name_len, const char *data, +			       int data_len, void *ctx)  {  	int ret;  	struct send_ctx *sctx = ctx; @@ -4697,8 +4716,7 @@ out:  static int __process_deleted_xattr(int num, struct btrfs_key *di_key,  				   const char *name, int name_len, -				   const char *data, int data_len, -				   u8 type, void *ctx) +				   const char *data, int data_len, void *ctx)  {  	int ret;  	struct send_ctx *sctx = ctx; @@ -4743,10 +4761,8 @@ struct find_xattr_ctx {  	int found_data_len;  }; -static int __find_xattr(int num, struct btrfs_key *di_key, -			const char *name, int name_len, -			const char *data, int data_len, -			u8 type, void *vctx) +static int __find_xattr(int num, struct btrfs_key *di_key, const char *name, +			int name_len, const char *data, int data_len, void *vctx)  {  	struct find_xattr_ctx *ctx = vctx; @@ -4796,7 +4812,7 @@ static int find_xattr(struct btrfs_root *root,  static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,  				       const char *name, int name_len,  				       const char *data, int data_len, -				       u8 type, void *ctx) +				       void *ctx)  {  	int ret;  	struct send_ctx *sctx = ctx; @@ -4808,12 +4824,12 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,  			 &found_data_len);  	if (ret == -ENOENT) {  		ret = __process_new_xattr(num, di_key, name, name_len, data, -				data_len, type, ctx); +					  data_len, ctx);  	} else if (ret >= 0) {  		if (data_len != found_data_len ||  		    memcmp(data, found_data, data_len)) {  			ret = __process_new_xattr(num, di_key, name, name_len, -					data, data_len, type, ctx); +						  data, data_len, ctx);  		} else {  			ret = 0;  		} @@ -4826,7 +4842,7 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,  static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,  					   const char *name, int name_len,  					   const char *data, int data_len, -					   u8 type, void *ctx) +					   void *ctx)  {  	int ret;  	struct send_ctx *sctx = ctx; @@ -4835,7 +4851,7 @@ static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,  			 name, name_len, NULL, NULL);  	if (ret == -ENOENT)  		ret = __process_deleted_xattr(num, di_key, name, name_len, data, -				data_len, type, ctx); +					      data_len, ctx);  	else if (ret >= 0)  		ret = 0; @@ -6566,7 +6582,7 @@ static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,  	}  	leaf = path->nodes[0]; -	item_size = btrfs_item_size_nr(leaf, path->slots[0]); +	item_size = btrfs_item_size(leaf, path->slots[0]);  	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);  	while (cur_offset < item_size) {  		extref = (struct btrfs_inode_extref *)(ptr + @@ -6597,6 +6613,50 @@ static int changed_cb(struct btrfs_path *left_path,  {  	int ret = 0; +	/* +	 * We can not hold the commit root semaphore here. This is because in +	 * the case of sending and receiving to the same filesystem, using a +	 * pipe, could result in a deadlock: +	 * +	 * 1) The task running send blocks on the pipe because it's full; +	 * +	 * 2) The task running receive, which is the only consumer of the pipe, +	 *    is waiting for a transaction commit (for example due to a space +	 *    reservation when doing a write or triggering a transaction commit +	 *    when creating a subvolume); +	 * +	 * 3) The transaction is waiting to write lock the commit root semaphore, +	 *    but can not acquire it since it's being held at 1). +	 * +	 * Down this call chain we write to the pipe through kernel_write(). +	 * The same type of problem can also happen when sending to a file that +	 * is stored in the same filesystem - when reserving space for a write +	 * into the file, we can trigger a transaction commit. +	 * +	 * Our caller has supplied us with clones of leaves from the send and +	 * parent roots, so we're safe here from a concurrent relocation and +	 * further reallocation of metadata extents while we are here. Below we +	 * also assert that the leaves are clones. +	 */ +	lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem); + +	/* +	 * We always have a send root, so left_path is never NULL. We will not +	 * have a leaf when we have reached the end of the send root but have +	 * not yet reached the end of the parent root. +	 */ +	if (left_path->nodes[0]) +		ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, +				&left_path->nodes[0]->bflags)); +	/* +	 * When doing a full send we don't have a parent root, so right_path is +	 * NULL. When doing an incremental send, we may have reached the end of +	 * the parent root already, so we don't have a leaf at right_path. +	 */ +	if (right_path && right_path->nodes[0]) +		ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, +				&right_path->nodes[0]->bflags)); +  	if (result == BTRFS_COMPARE_TREE_SAME) {  		if (key->type == BTRFS_INODE_REF_KEY ||  		    key->type == BTRFS_INODE_EXTREF_KEY) { @@ -6643,14 +6703,46 @@ out:  	return ret;  } +static int search_key_again(const struct send_ctx *sctx, +			    struct btrfs_root *root, +			    struct btrfs_path *path, +			    const struct btrfs_key *key) +{ +	int ret; + +	if (!path->need_commit_sem) +		lockdep_assert_held_read(&root->fs_info->commit_root_sem); + +	/* +	 * Roots used for send operations are readonly and no one can add, +	 * update or remove keys from them, so we should be able to find our +	 * key again. The only exception is deduplication, which can operate on +	 * readonly roots and add, update or remove keys to/from them - but at +	 * the moment we don't allow it to run in parallel with send. +	 */ +	ret = btrfs_search_slot(NULL, root, key, path, 0, 0); +	ASSERT(ret <= 0); +	if (ret > 0) { +		btrfs_print_tree(path->nodes[path->lowest_level], false); +		btrfs_err(root->fs_info, +"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d", +			  key->objectid, key->type, key->offset, +			  (root == sctx->parent_root ? "parent" : "send"), +			  root->root_key.objectid, path->lowest_level, +			  path->slots[path->lowest_level]); +		return -EUCLEAN; +	} + +	return ret; +} +  static int full_send_tree(struct send_ctx *sctx)  {  	int ret;  	struct btrfs_root *send_root = sctx->send_root;  	struct btrfs_key key; +	struct btrfs_fs_info *fs_info = send_root->fs_info;  	struct btrfs_path *path; -	struct extent_buffer *eb; -	int slot;  	path = alloc_path_for_send();  	if (!path) @@ -6661,6 +6753,10 @@ static int full_send_tree(struct send_ctx *sctx)  	key.type = BTRFS_INODE_ITEM_KEY;  	key.offset = 0; +	down_read(&fs_info->commit_root_sem); +	sctx->last_reloc_trans = fs_info->last_reloc_trans; +	up_read(&fs_info->commit_root_sem); +  	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);  	if (ret < 0)  		goto out; @@ -6668,15 +6764,35 @@ static int full_send_tree(struct send_ctx *sctx)  		goto out_finish;  	while (1) { -		eb = path->nodes[0]; -		slot = path->slots[0]; -		btrfs_item_key_to_cpu(eb, &key, slot); +		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);  		ret = changed_cb(path, NULL, &key,  				 BTRFS_COMPARE_TREE_NEW, sctx);  		if (ret < 0)  			goto out; +		down_read(&fs_info->commit_root_sem); +		if (fs_info->last_reloc_trans > sctx->last_reloc_trans) { +			sctx->last_reloc_trans = fs_info->last_reloc_trans; +			up_read(&fs_info->commit_root_sem); +			/* +			 * A transaction used for relocating a block group was +			 * committed or is about to finish its commit. Release +			 * our path (leaf) and restart the search, so that we +			 * avoid operating on any file extent items that are +			 * stale, with a disk_bytenr that reflects a pre +			 * relocation value. This way we avoid as much as +			 * possible to fallback to regular writes when checking +			 * if we can clone file ranges. +			 */ +			btrfs_release_path(path); +			ret = search_key_again(sctx, send_root, path, &key); +			if (ret < 0) +				goto out; +		} else { +			up_read(&fs_info->commit_root_sem); +		} +  		ret = btrfs_next_item(send_root, path);  		if (ret < 0)  			goto out; @@ -6694,6 +6810,20 @@ out:  	return ret;  } +static int replace_node_with_clone(struct btrfs_path *path, int level) +{ +	struct extent_buffer *clone; + +	clone = btrfs_clone_extent_buffer(path->nodes[level]); +	if (!clone) +		return -ENOMEM; + +	free_extent_buffer(path->nodes[level]); +	path->nodes[level] = clone; + +	return 0; +} +  static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)  {  	struct extent_buffer *eb; @@ -6703,6 +6833,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen  	u64 reada_max;  	u64 reada_done = 0; +	lockdep_assert_held_read(&parent->fs_info->commit_root_sem); +  	BUG_ON(*level == 0);  	eb = btrfs_read_node_slot(parent, slot);  	if (IS_ERR(eb)) @@ -6726,6 +6858,10 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen  	path->nodes[*level - 1] = eb;  	path->slots[*level - 1] = 0;  	(*level)--; + +	if (*level == 0) +		return replace_node_with_clone(path, 0); +  	return 0;  } @@ -6739,8 +6875,10 @@ static int tree_move_next_or_upnext(struct btrfs_path *path,  	path->slots[*level]++;  	while (path->slots[*level] >= nritems) { -		if (*level == root_level) +		if (*level == root_level) { +			path->slots[*level] = nritems - 1;  			return -1; +		}  		/* move upnext */  		path->slots[*level] = 0; @@ -6772,14 +6910,20 @@ static int tree_advance(struct btrfs_path *path,  	} else {  		ret = tree_move_down(path, level, reada_min_gen);  	} -	if (ret >= 0) { -		if (*level == 0) -			btrfs_item_key_to_cpu(path->nodes[*level], key, -					path->slots[*level]); -		else -			btrfs_node_key_to_cpu(path->nodes[*level], key, -					path->slots[*level]); -	} + +	/* +	 * Even if we have reached the end of a tree, ret is -1, update the key +	 * anyway, so that in case we need to restart due to a block group +	 * relocation, we can assert that the last key of the root node still +	 * exists in the tree. +	 */ +	if (*level == 0) +		btrfs_item_key_to_cpu(path->nodes[*level], key, +				      path->slots[*level]); +	else +		btrfs_node_key_to_cpu(path->nodes[*level], key, +				      path->slots[*level]); +  	return ret;  } @@ -6791,8 +6935,8 @@ static int tree_compare_item(struct btrfs_path *left_path,  	int len1, len2;  	unsigned long off1, off2; -	len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]); -	len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]); +	len1 = btrfs_item_size(left_path->nodes[0], left_path->slots[0]); +	len2 = btrfs_item_size(right_path->nodes[0], right_path->slots[0]);  	if (len1 != len2)  		return 1; @@ -6809,6 +6953,97 @@ static int tree_compare_item(struct btrfs_path *left_path,  }  /* + * A transaction used for relocating a block group was committed or is about to + * finish its commit. Release our paths and restart the search, so that we are + * not using stale extent buffers: + * + * 1) For levels > 0, we are only holding references of extent buffers, without + *    any locks on them, which does not prevent them from having been relocated + *    and reallocated after the last time we released the commit root semaphore. + *    The exception are the root nodes, for which we always have a clone, see + *    the comment at btrfs_compare_trees(); + * + * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so + *    we are safe from the concurrent relocation and reallocation. However they + *    can have file extent items with a pre relocation disk_bytenr value, so we + *    restart the start from the current commit roots and clone the new leaves so + *    that we get the post relocation disk_bytenr values. Not doing so, could + *    make us clone the wrong data in case there are new extents using the old + *    disk_bytenr that happen to be shared. + */ +static int restart_after_relocation(struct btrfs_path *left_path, +				    struct btrfs_path *right_path, +				    const struct btrfs_key *left_key, +				    const struct btrfs_key *right_key, +				    int left_level, +				    int right_level, +				    const struct send_ctx *sctx) +{ +	int root_level; +	int ret; + +	lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem); + +	btrfs_release_path(left_path); +	btrfs_release_path(right_path); + +	/* +	 * Since keys can not be added or removed to/from our roots because they +	 * are readonly and we do not allow deduplication to run in parallel +	 * (which can add, remove or change keys), the layout of the trees should +	 * not change. +	 */ +	left_path->lowest_level = left_level; +	ret = search_key_again(sctx, sctx->send_root, left_path, left_key); +	if (ret < 0) +		return ret; + +	right_path->lowest_level = right_level; +	ret = search_key_again(sctx, sctx->parent_root, right_path, right_key); +	if (ret < 0) +		return ret; + +	/* +	 * If the lowest level nodes are leaves, clone them so that they can be +	 * safely used by changed_cb() while not under the protection of the +	 * commit root semaphore, even if relocation and reallocation happens in +	 * parallel. +	 */ +	if (left_level == 0) { +		ret = replace_node_with_clone(left_path, 0); +		if (ret < 0) +			return ret; +	} + +	if (right_level == 0) { +		ret = replace_node_with_clone(right_path, 0); +		if (ret < 0) +			return ret; +	} + +	/* +	 * Now clone the root nodes (unless they happen to be the leaves we have +	 * already cloned). This is to protect against concurrent snapshotting of +	 * the send and parent roots (see the comment at btrfs_compare_trees()). +	 */ +	root_level = btrfs_header_level(sctx->send_root->commit_root); +	if (root_level > 0) { +		ret = replace_node_with_clone(left_path, root_level); +		if (ret < 0) +			return ret; +	} + +	root_level = btrfs_header_level(sctx->parent_root->commit_root); +	if (root_level > 0) { +		ret = replace_node_with_clone(right_path, root_level); +		if (ret < 0) +			return ret; +	} + +	return 0; +} + +/*   * This function compares two trees and calls the provided callback for   * every changed/new/deleted item it finds.   * If shared tree blocks are encountered, whole subtrees are skipped, making @@ -6836,10 +7071,10 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  	int right_root_level;  	int left_level;  	int right_level; -	int left_end_reached; -	int right_end_reached; -	int advance_left; -	int advance_right; +	int left_end_reached = 0; +	int right_end_reached = 0; +	int advance_left = 0; +	int advance_right = 0;  	u64 left_blockptr;  	u64 right_blockptr;  	u64 left_gen; @@ -6907,12 +7142,18 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  	down_read(&fs_info->commit_root_sem);  	left_level = btrfs_header_level(left_root->commit_root);  	left_root_level = left_level; +	/* +	 * We clone the root node of the send and parent roots to prevent races +	 * with snapshot creation of these roots. Snapshot creation COWs the +	 * root node of a tree, so after the transaction is committed the old +	 * extent can be reallocated while this send operation is still ongoing. +	 * So we clone them, under the commit root semaphore, to be race free. +	 */  	left_path->nodes[left_level] =  			btrfs_clone_extent_buffer(left_root->commit_root);  	if (!left_path->nodes[left_level]) { -		up_read(&fs_info->commit_root_sem);  		ret = -ENOMEM; -		goto out; +		goto out_unlock;  	}  	right_level = btrfs_header_level(right_root->commit_root); @@ -6920,9 +7161,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  	right_path->nodes[right_level] =  			btrfs_clone_extent_buffer(right_root->commit_root);  	if (!right_path->nodes[right_level]) { -		up_read(&fs_info->commit_root_sem);  		ret = -ENOMEM; -		goto out; +		goto out_unlock;  	}  	/*  	 * Our right root is the parent root, while the left root is the "send" @@ -6932,7 +7172,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  	 * will need to read them at some point.  	 */  	reada_min_gen = btrfs_header_generation(right_root->commit_root); -	up_read(&fs_info->commit_root_sem);  	if (left_level == 0)  		btrfs_item_key_to_cpu(left_path->nodes[left_level], @@ -6947,11 +7186,26 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  		btrfs_node_key_to_cpu(right_path->nodes[right_level],  				&right_key, right_path->slots[right_level]); -	left_end_reached = right_end_reached = 0; -	advance_left = advance_right = 0; +	sctx->last_reloc_trans = fs_info->last_reloc_trans;  	while (1) { -		cond_resched(); +		if (need_resched() || +		    rwsem_is_contended(&fs_info->commit_root_sem)) { +			up_read(&fs_info->commit_root_sem); +			cond_resched(); +			down_read(&fs_info->commit_root_sem); +		} + +		if (fs_info->last_reloc_trans > sctx->last_reloc_trans) { +			ret = restart_after_relocation(left_path, right_path, +						       &left_key, &right_key, +						       left_level, right_level, +						       sctx); +			if (ret < 0) +				goto out_unlock; +			sctx->last_reloc_trans = fs_info->last_reloc_trans; +		} +  		if (advance_left && !left_end_reached) {  			ret = tree_advance(left_path, &left_level,  					left_root_level, @@ -6960,7 +7214,7 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  			if (ret == -1)  				left_end_reached = ADVANCE;  			else if (ret < 0) -				goto out; +				goto out_unlock;  			advance_left = 0;  		}  		if (advance_right && !right_end_reached) { @@ -6971,54 +7225,55 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  			if (ret == -1)  				right_end_reached = ADVANCE;  			else if (ret < 0) -				goto out; +				goto out_unlock;  			advance_right = 0;  		}  		if (left_end_reached && right_end_reached) {  			ret = 0; -			goto out; +			goto out_unlock;  		} else if (left_end_reached) {  			if (right_level == 0) { +				up_read(&fs_info->commit_root_sem);  				ret = changed_cb(left_path, right_path,  						&right_key,  						BTRFS_COMPARE_TREE_DELETED,  						sctx);  				if (ret < 0)  					goto out; +				down_read(&fs_info->commit_root_sem);  			}  			advance_right = ADVANCE;  			continue;  		} else if (right_end_reached) {  			if (left_level == 0) { +				up_read(&fs_info->commit_root_sem);  				ret = changed_cb(left_path, right_path,  						&left_key,  						BTRFS_COMPARE_TREE_NEW,  						sctx);  				if (ret < 0)  					goto out; +				down_read(&fs_info->commit_root_sem);  			}  			advance_left = ADVANCE;  			continue;  		}  		if (left_level == 0 && right_level == 0) { +			up_read(&fs_info->commit_root_sem);  			cmp = btrfs_comp_cpu_keys(&left_key, &right_key);  			if (cmp < 0) {  				ret = changed_cb(left_path, right_path,  						&left_key,  						BTRFS_COMPARE_TREE_NEW,  						sctx); -				if (ret < 0) -					goto out;  				advance_left = ADVANCE;  			} else if (cmp > 0) {  				ret = changed_cb(left_path, right_path,  						&right_key,  						BTRFS_COMPARE_TREE_DELETED,  						sctx); -				if (ret < 0) -					goto out;  				advance_right = ADVANCE;  			} else {  				enum btrfs_compare_tree_result result; @@ -7032,11 +7287,13 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  					result = BTRFS_COMPARE_TREE_SAME;  				ret = changed_cb(left_path, right_path,  						 &left_key, result, sctx); -				if (ret < 0) -					goto out;  				advance_left = ADVANCE;  				advance_right = ADVANCE;  			} + +			if (ret < 0) +				goto out; +			down_read(&fs_info->commit_root_sem);  		} else if (left_level == right_level) {  			cmp = btrfs_comp_cpu_keys(&left_key, &right_key);  			if (cmp < 0) { @@ -7076,6 +7333,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,  		}  	} +out_unlock: +	up_read(&fs_info->commit_root_sem);  out:  	btrfs_free_path(left_path);  	btrfs_free_path(right_path); @@ -7425,21 +7684,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)  	if (ret)  		goto out; -	spin_lock(&fs_info->send_reloc_lock); -	if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) { -		spin_unlock(&fs_info->send_reloc_lock); -		btrfs_warn_rl(fs_info, -		"cannot run send because a relocation operation is in progress"); -		ret = -EAGAIN; -		goto out; -	} -	fs_info->send_in_progress++; -	spin_unlock(&fs_info->send_reloc_lock); -  	ret = send_subvol(sctx); -	spin_lock(&fs_info->send_reloc_lock); -	fs_info->send_in_progress--; -	spin_unlock(&fs_info->send_reloc_lock);  	if (ret < 0)  		goto out;  |