diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 270 | 
1 files changed, 91 insertions, 179 deletions
| diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f8220ec02036..1650dc44a5e3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,  	key.type = BTRFS_INODE_ITEM_KEY;  	key.offset = 0;  	inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); -	if (IS_ERR(inode)) { +	if (IS_ERR(inode))  		inode = NULL; -	} else if (is_bad_inode(inode)) { -		iput(inode); -		inode = NULL; -	}  	return inode;  } @@ -597,7 +593,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,  		if (btrfs_file_extent_disk_bytenr(eb, item) == 0)  			nbytes = 0;  	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) { -		size = btrfs_file_extent_inline_len(eb, slot, item); +		size = btrfs_file_extent_ram_bytes(eb, item);  		nbytes = btrfs_file_extent_ram_bytes(eb, item);  		extent_end = ALIGN(start + size,  				   fs_info->sectorsize); @@ -685,7 +681,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,  		 * as the owner of the file extent changed from log tree  		 * (doesn't affect qgroup) to fs/file tree(affects qgroup)  		 */ -		ret = btrfs_qgroup_trace_extent(trans, fs_info, +		ret = btrfs_qgroup_trace_extent(trans,  				btrfs_file_extent_disk_bytenr(eb, item),  				btrfs_file_extent_disk_num_bytes(eb, item),  				GFP_NOFS); @@ -715,7 +711,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,  				 * allocation tree  				 */  				ret = btrfs_alloc_logged_file_extent(trans, -						fs_info,  						root->root_key.objectid,  						key->objectid, offset, &ins);  				if (ret) @@ -1291,6 +1286,46 @@ again:  	return ret;  } +static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir, +				  const u8 ref_type, const char *name, +				  const int namelen) +{ +	struct btrfs_key key; +	struct btrfs_path *path; +	const u64 parent_id = btrfs_ino(BTRFS_I(dir)); +	int ret; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	key.objectid = btrfs_ino(BTRFS_I(inode)); +	key.type = ref_type; +	if (key.type == BTRFS_INODE_REF_KEY) +		key.offset = parent_id; +	else +		key.offset = btrfs_extref_hash(parent_id, name, namelen); + +	ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0); +	if (ret < 0) +		goto out; +	if (ret > 0) { +		ret = 0; +		goto out; +	} +	if (key.type == BTRFS_INODE_EXTREF_KEY) +		ret = btrfs_find_name_in_ext_backref(path->nodes[0], +						     path->slots[0], parent_id, +						     name, namelen, NULL); +	else +		ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], +						 name, namelen, NULL); + +out: +	btrfs_free_path(path); +	return ret; +} +  /*   * replay one inode back reference item found in the log tree.   * eb, slot and key refer to the buffer and key found in the log tree. @@ -1400,6 +1435,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,  				}  			} +			/* +			 * If a reference item already exists for this inode +			 * with the same parent and name, but different index, +			 * drop it and the corresponding directory index entries +			 * from the parent before adding the new reference item +			 * and dir index entries, otherwise we would fail with +			 * -EEXIST returned from btrfs_add_link() below. +			 */ +			ret = btrfs_inode_ref_exists(inode, dir, key->type, +						     name, namelen); +			if (ret > 0) { +				ret = btrfs_unlink_inode(trans, root, +							 BTRFS_I(dir), +							 BTRFS_I(inode), +							 name, namelen); +				/* +				 * If we dropped the link count to 0, bump it so +				 * that later the iput() on the inode will not +				 * free it. We will fixup the link count later. +				 */ +				if (!ret && inode->i_nlink == 0) +					inc_nlink(inode); +			} +			if (ret < 0) +				goto out; +  			/* insert our name */  			ret = btrfs_add_link(trans, BTRFS_I(dir),  					BTRFS_I(inode), @@ -2120,7 +2181,7 @@ again:  						     dir_key->offset,  						     name, name_len, 0);  		} -		if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { +		if (!log_di || log_di == ERR_PTR(-ENOENT)) {  			btrfs_dir_item_key_to_cpu(eb, di, &location);  			btrfs_release_path(path);  			btrfs_release_path(log_path); @@ -2933,7 +2994,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  	/* bail out if we need to do a full commit */  	if (btrfs_need_log_full_commit(fs_info, trans)) {  		ret = -EAGAIN; -		btrfs_free_logged_extents(log, log_transid);  		mutex_unlock(&root->log_mutex);  		goto out;  	} @@ -2951,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  	if (ret) {  		blk_finish_plug(&plug);  		btrfs_abort_transaction(trans, ret); -		btrfs_free_logged_extents(log, log_transid);  		btrfs_set_log_full_commit(fs_info, trans);  		mutex_unlock(&root->log_mutex);  		goto out; @@ -3002,7 +3061,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  			goto out;  		}  		btrfs_wait_tree_log_extents(log, mark); -		btrfs_free_logged_extents(log, log_transid);  		mutex_unlock(&log_root_tree->log_mutex);  		ret = -EAGAIN;  		goto out; @@ -3020,7 +3078,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  	if (atomic_read(&log_root_tree->log_commit[index2])) {  		blk_finish_plug(&plug);  		ret = btrfs_wait_tree_log_extents(log, mark); -		btrfs_wait_logged_extents(trans, log, log_transid);  		wait_log_commit(log_root_tree,  				root_log_ctx.log_transid);  		mutex_unlock(&log_root_tree->log_mutex); @@ -3045,7 +3102,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  	if (btrfs_need_log_full_commit(fs_info, trans)) {  		blk_finish_plug(&plug);  		btrfs_wait_tree_log_extents(log, mark); -		btrfs_free_logged_extents(log, log_transid);  		mutex_unlock(&log_root_tree->log_mutex);  		ret = -EAGAIN;  		goto out_wake_log_root; @@ -3058,7 +3114,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  	if (ret) {  		btrfs_set_log_full_commit(fs_info, trans);  		btrfs_abort_transaction(trans, ret); -		btrfs_free_logged_extents(log, log_transid);  		mutex_unlock(&log_root_tree->log_mutex);  		goto out_wake_log_root;  	} @@ -3068,11 +3123,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,  						  EXTENT_NEW | EXTENT_DIRTY);  	if (ret) {  		btrfs_set_log_full_commit(fs_info, trans); -		btrfs_free_logged_extents(log, log_transid);  		mutex_unlock(&log_root_tree->log_mutex);  		goto out_wake_log_root;  	} -	btrfs_wait_logged_extents(trans, log, log_transid);  	btrfs_set_super_log_root(fs_info->super_for_commit,  				 log_root_tree->node->start); @@ -3159,14 +3212,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,  				  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);  	} -	/* -	 * We may have short-circuited the log tree with the full commit logic -	 * and left ordered extents on our list, so clear these out to keep us -	 * from leaking inodes and memory. -	 */ -	btrfs_free_logged_extents(log, 0); -	btrfs_free_logged_extents(log, 1); -  	free_extent_buffer(log->node);  	kfree(log);  } @@ -3756,7 +3801,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,  			       int start_slot, int nr, int inode_only,  			       u64 logged_isize)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = trans->fs_info;  	unsigned long src_offset;  	unsigned long dst_offset;  	struct btrfs_root *log = inode->root->log_root; @@ -3937,9 +3982,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,  					struct btrfs_file_extent_item);  		if (btrfs_file_extent_type(src, extent) ==  		    BTRFS_FILE_EXTENT_INLINE) { -			len = btrfs_file_extent_inline_len(src, -							   src_path->slots[0], -							   extent); +			len = btrfs_file_extent_ram_bytes(src, extent);  			*last_extent = ALIGN(key.offset + len,  					     fs_info->sectorsize);  		} else { @@ -4004,7 +4047,7 @@ fill_holes:  		extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);  		if (btrfs_file_extent_type(src, extent) ==  		    BTRFS_FILE_EXTENT_INLINE) { -			len = btrfs_file_extent_inline_len(src, i, extent); +			len = btrfs_file_extent_ram_bytes(src, extent);  			extent_end = ALIGN(key.offset + len,  					   fs_info->sectorsize);  		} else { @@ -4078,131 +4121,32 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)  	return 0;  } -static int wait_ordered_extents(struct btrfs_trans_handle *trans, -				struct inode *inode, -				struct btrfs_root *root, -				const struct extent_map *em, -				const struct list_head *logged_list, -				bool *ordered_io_error) +static int log_extent_csums(struct btrfs_trans_handle *trans, +			    struct btrfs_inode *inode, +			    struct btrfs_root *log_root, +			    const struct extent_map *em)  { -	struct btrfs_fs_info *fs_info = root->fs_info; -	struct btrfs_ordered_extent *ordered; -	struct btrfs_root *log = root->log_root; -	u64 mod_start = em->mod_start; -	u64 mod_len = em->mod_len; -	const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;  	u64 csum_offset;  	u64 csum_len;  	LIST_HEAD(ordered_sums);  	int ret = 0; -	*ordered_io_error = false; - -	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || +	if (inode->flags & BTRFS_INODE_NODATASUM || +	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||  	    em->block_start == EXTENT_MAP_HOLE)  		return 0; -	/* -	 * Wait far any ordered extent that covers our extent map. If it -	 * finishes without an error, first check and see if our csums are on -	 * our outstanding ordered extents. -	 */ -	list_for_each_entry(ordered, logged_list, log_list) { -		struct btrfs_ordered_sum *sum; - -		if (!mod_len) -			break; - -		if (ordered->file_offset + ordered->len <= mod_start || -		    mod_start + mod_len <= ordered->file_offset) -			continue; - -		if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && -		    !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && -		    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { -			const u64 start = ordered->file_offset; -			const u64 end = ordered->file_offset + ordered->len - 1; - -			WARN_ON(ordered->inode != inode); -			filemap_fdatawrite_range(inode->i_mapping, start, end); -		} - -		wait_event(ordered->wait, -			   (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || -			    test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); - -		if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { -			/* -			 * Clear the AS_EIO/AS_ENOSPC flags from the inode's -			 * i_mapping flags, so that the next fsync won't get -			 * an outdated io error too. -			 */ -			filemap_check_errors(inode->i_mapping); -			*ordered_io_error = true; -			break; -		} -		/* -		 * We are going to copy all the csums on this ordered extent, so -		 * go ahead and adjust mod_start and mod_len in case this -		 * ordered extent has already been logged. -		 */ -		if (ordered->file_offset > mod_start) { -			if (ordered->file_offset + ordered->len >= -			    mod_start + mod_len) -				mod_len = ordered->file_offset - mod_start; -			/* -			 * If we have this case -			 * -			 * |--------- logged extent ---------| -			 *       |----- ordered extent ----| -			 * -			 * Just don't mess with mod_start and mod_len, we'll -			 * just end up logging more csums than we need and it -			 * will be ok. -			 */ -		} else { -			if (ordered->file_offset + ordered->len < -			    mod_start + mod_len) { -				mod_len = (mod_start + mod_len) - -					(ordered->file_offset + ordered->len); -				mod_start = ordered->file_offset + -					ordered->len; -			} else { -				mod_len = 0; -			} -		} - -		if (skip_csum) -			continue; - -		/* -		 * To keep us from looping for the above case of an ordered -		 * extent that falls inside of the logged extent. -		 */ -		if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, -				     &ordered->flags)) -			continue; - -		list_for_each_entry(sum, &ordered->list, list) { -			ret = btrfs_csum_file_blocks(trans, log, sum); -			if (ret) -				break; -		} -	} - -	if (*ordered_io_error || !mod_len || ret || skip_csum) -		return ret; - +	/* If we're compressed we have to save the entire range of csums. */  	if (em->compress_type) {  		csum_offset = 0;  		csum_len = max(em->block_len, em->orig_block_len);  	} else { -		csum_offset = mod_start - em->start; -		csum_len = mod_len; +		csum_offset = em->mod_start - em->start; +		csum_len = em->mod_len;  	}  	/* block start is already adjusted for the file extent offset. */ -	ret = btrfs_lookup_csums_range(fs_info->csum_root, +	ret = btrfs_lookup_csums_range(trans->fs_info->csum_root,  				       em->block_start + csum_offset,  				       em->block_start + csum_offset +  				       csum_len - 1, &ordered_sums, 0); @@ -4214,7 +4158,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,  						   struct btrfs_ordered_sum,  						   list);  		if (!ret) -			ret = btrfs_csum_file_blocks(trans, log, sums); +			ret = btrfs_csum_file_blocks(trans, log_root, sums);  		list_del(&sums->list);  		kfree(sums);  	} @@ -4226,7 +4170,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,  			  struct btrfs_inode *inode, struct btrfs_root *root,  			  const struct extent_map *em,  			  struct btrfs_path *path, -			  const struct list_head *logged_list,  			  struct btrfs_log_ctx *ctx)  {  	struct btrfs_root *log = root->log_root; @@ -4238,18 +4181,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,  	u64 block_len;  	int ret;  	int extent_inserted = 0; -	bool ordered_io_err = false; -	ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em, -			logged_list, &ordered_io_err); +	ret = log_extent_csums(trans, inode, log, em);  	if (ret)  		return ret; -	if (ordered_io_err) { -		ctx->io_err = -EIO; -		return ctx->io_err; -	} -  	btrfs_init_map_token(&token);  	ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start, @@ -4424,7 +4360,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,  				     struct btrfs_root *root,  				     struct btrfs_inode *inode,  				     struct btrfs_path *path, -				     struct list_head *logged_list,  				     struct btrfs_log_ctx *ctx,  				     const u64 start,  				     const u64 end) @@ -4480,20 +4415,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,  	}  	list_sort(NULL, &extents, extent_cmp); -	btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); -	/* -	 * Some ordered extents started by fsync might have completed -	 * before we could collect them into the list logged_list, which -	 * means they're gone, not in our logged_list nor in the inode's -	 * ordered tree. We want the application/user space to know an -	 * error happened while attempting to persist file data so that -	 * it can take proper action. If such error happened, we leave -	 * without writing to the log tree and the fsync must report the -	 * file data write error and not commit the current transaction. -	 */ -	ret = filemap_check_errors(inode->vfs_inode.i_mapping); -	if (ret) -		ctx->io_err = ret;  process:  	while (!list_empty(&extents)) {  		em = list_entry(extents.next, struct extent_map, list); @@ -4512,8 +4433,7 @@ process:  		write_unlock(&tree->lock); -		ret = log_one_extent(trans, inode, root, em, path, logged_list, -				     ctx); +		ret = log_one_extent(trans, inode, root, em, path, ctx);  		write_lock(&tree->lock);  		clear_em_logging(tree, em);  		free_extent_map(em); @@ -4712,9 +4632,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,  		if (btrfs_file_extent_type(leaf, extent) ==  		    BTRFS_FILE_EXTENT_INLINE) { -			len = btrfs_file_extent_inline_len(leaf, -							   path->slots[0], -							   extent); +			len = btrfs_file_extent_ram_bytes(leaf, extent);  			ASSERT(len == i_size ||  			       (len == fs_info->sectorsize &&  				btrfs_file_extent_compression(leaf, extent) != @@ -4898,7 +4816,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,  	struct btrfs_key min_key;  	struct btrfs_key max_key;  	struct btrfs_root *log = root->log_root; -	LIST_HEAD(logged_list);  	u64 last_extent = 0;  	int err = 0;  	int ret; @@ -5094,8 +5011,7 @@ again:  				 * we don't need to do more work nor fallback to  				 * a transaction commit.  				 */ -				if (IS_ERR(other_inode) && -				    PTR_ERR(other_inode) == -ENOENT) { +				if (other_inode == ERR_PTR(-ENOENT)) {  					goto next_key;  				} else if (IS_ERR(other_inode)) {  					err = PTR_ERR(other_inode); @@ -5235,7 +5151,7 @@ log_extents:  	}  	if (fast_search) {  		ret = btrfs_log_changed_extents(trans, root, inode, dst_path, -						&logged_list, ctx, start, end); +						ctx, start, end);  		if (ret) {  			err = ret;  			goto out_unlock; @@ -5286,10 +5202,6 @@ log_extents:  	inode->last_log_commit = inode->last_sub_trans;  	spin_unlock(&inode->lock);  out_unlock: -	if (unlikely(err)) -		btrfs_put_logged_extents(&logged_list); -	else -		btrfs_submit_logged_extents(&logged_list, log);  	mutex_unlock(&inode->log_mutex);  	btrfs_free_path(path); @@ -5585,7 +5497,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,  				 struct btrfs_inode *inode,  				 struct btrfs_log_ctx *ctx)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = trans->fs_info;  	int ret;  	struct btrfs_path *path;  	struct btrfs_key key; @@ -6120,7 +6032,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,  			struct btrfs_inode *inode, struct btrfs_inode *old_dir,  			struct dentry *parent)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); +	struct btrfs_fs_info *fs_info = trans->fs_info;  	/*  	 * this will force the logging code to walk the dentry chain |